diff --git a/.ipynb_checkpoints/preprocessing_functions-checkpoint.py b/.ipynb_checkpoints/preprocessing_functions-checkpoint.py new file mode 100644 index 0000000..2267e8b --- /dev/null +++ b/.ipynb_checkpoints/preprocessing_functions-checkpoint.py @@ -0,0 +1,208 @@ +import pandas as pd +from pathlib import Path +import numpy as np +import matplotlib.pyplot as plt +import h5py +import scipy.io +import spikeinterface as si +import spikeinterface.preprocessing as prep +import spikeinterface.widgets as sw +import spikeinterface.sorters as ss +import spikeinterface.exporters as exp +import probeinterface as pi +from spikeinterface.extractors import read_intan +from probeinterface import Probe, ProbeGroup +import os # Agregado + +# Configura Jupyter para usar widgets (esto debe estar en una celda separada en Jupyter) + +def read_rhd(rhd_folder): + rhd_folder_path = Path(rhd_folder) + rhd_files = list(rhd_folder_path.rglob('*.rhd')) # Usa rglob para buscar recursivamente si es necesario + recordings =[] + + if not rhd_files: + print("No se encontraron archivos .rhd en la carpeta.") + return None + else: + print(f"Leyendo {rhd_files}") + recordings = [read_intan(file, stream_name='RHD2000 amplifier channel') for file in rhd_files] + + if len(recordings) > 1: + recording = si.concatenate_recordings(recordings) + print(f"Concatenados {len(rhd_files)} archivos .rhd.") + else: + recording = recordings[0] + print(f"Procesado 1 archivo .rhd.") + + return recording + + +def get_recording(excel_file, probegroup_file): + try: + df = pd.read_excel(Path(excel_file)) + probegroup = pi.read_probeinterface(probegroup_file) + + recordings = [] + diferencias = [] + + for row in df.itertuples(index=False): + file_data_folder = Path(row.data_folder) + recording = read_rhd(file_data_folder) + + # frecuencia de sampleo registro original. + + sn=recording.get_num_samples() + td=recording.get_total_duration() + fs = (int(sn/ td)) + + if recording is None: + continue + + list_triggers, ms_before, ms_after = process_artifacts(row.artifacts, row.data_folder, fs) + + recording = prep.bandpass_filter(recording, freq_min=500., freq_max=9000.) + recording = recording.set_probegroup(probegroup, group_mode='by_probe') + recording = prep.remove_artifacts( + recording=recording, + list_triggers=list_triggers, + mode="zeros" + ) + + recordings.append(recording) + + if len(recordings) > 1: + final_recording = si.concatenate_recordings(recordings) + print(f"Concatenados {len(recordings)} registros.") + elif recordings: + final_recording = recordings[0] + print("Solo un registro disponible para concatenar.") + else: + final_recording = None + print("No se procesaron registros.") + + print("Procesamiento de archivos completado") + return final_recording + + except Exception as e: + print(f"Error crítico en el procesamiento de archivos: {e}") + return None + +def check_concatenation(record_maze, record_sleep): + # Concatenar ambos registros + if record_maze is not None and record_sleep is not None: + final_recording = si.concatenate_recordings([record_maze, record_sleep]) + print("Registros de Maze y Sueño concatenados exitosamente.") + else: + final_recording = record_maze if record_maze is not None else record_sleep + if final_recording is not None: + print("Solo un registro disponible para concatenar.") + else: + print("No hay registros disponibles para concatenar.") + return final_recording + +def process_artifacts(artifacts, base_folder, fs): + # Listar todos los archivos en el folder base + all_files = os.listdir(base_folder) + lan_file = [lan_file for lan_file in all_files if lan_file.startswith('LAN') and lan_file.endswith('500.mat')] + + if not lan_file: + raise FileNotFoundError("No se encontró ningún archivo LAN en el directorio especificado.") + + path_lan = os.path.join(base_folder, lan_file[0]) + print (path_lan) + # Leer el archivo HDF5 + with h5py.File(path_lan, 'r') as LAN: + LAN = LAN['LAN'] + sf_ref = LAN['srate'] + srate = sf_ref[()][0] + selected_ref = LAN[artifacts] + + # Procesar los artefactos + selected = ~selected_ref[:] + selected = selected.astype(int) + selected_0 = np.insert(selected, 0, 0) + dif = np.diff(selected_0) + ind_ini = np.where(dif == 1)[0] + ind_fin = np.where(dif == -1)[0] + + # Ajustar índices para que cada inicio tenga un fin correspondiente + while len(ind_ini) != len(ind_fin): + if ind_ini[-1] > ind_fin[-1]: + ind_ini = ind_ini[:-1] + if ind_fin[0] < ind_ini[0]: + ind_fin = ind_fin[1:] + + # Calcular los triggers + list_ini = np.array(ind_ini / srate) + list_ini = list_ini * fs + list_ini = list_ini.astype(int) + + list_fin = np.array(ind_fin / srate) + list_fin = list_fin * fs + list_fin = list_fin.astype(int) + + list_triggers = [] + + list_triggers = crear_vectores_acumulados(0.1*fs,list_ini,list_fin) + + # Tiempos antes y después del artefacto + ms_before = 0 # zero before trigger + ms_after = 500 # 500 ms after trigger + + return list_triggers, ms_before, ms_after + + +def crear_vectores_acumulados(distancia, inicios, fines): + # Lista para almacenar los valores acumulados de todos los vectores + valores_acumulados = [] + + # Iteramos sobre cada par de inicio y fin + for inicio, fin in zip(inicios, fines): + # Creamos el vector para el par actual + vector = np.arange(inicio, fin, distancia) + # Convertimos cada valor del vector a entero antes de añadirlo + valores_acumulados.extend(vector.astype(int).tolist()) # Convertimos a int y luego extendemos + + # Convertimos la lista final a un array de NumPy de enteros + return np.array(valores_acumulados, dtype=int) + +def espigas(sorter): + sorting = sorter + +# Obtener el número de clusters + unit_ids = sorter.get_unit_ids() + num_clusters = len(unit_ids) + +# Obtener el número total de espigas + total_spikes = sum([len(sorting.get_unit_spike_train(unit_id)) for unit_id in unit_ids]) + return num_clusters, total_spikes + +def sorting_analyzer(sorting, recording, output_folder): + try: + sorting_analyzer = si.create_sorting_analyzer(sorting=sorting, + recording=recording, + sparse=True, + format="binary_folder", + folder=output_folder) + + # Compute necessary metrics + sorting_analyzer.compute(['random_spikes', 'waveforms', 'templates', 'noise_levels']) + sorting_analyzer.compute(['principal_components', 'spike_amplitudes', 'template_similarity', 'unit_locations', 'correlograms'],) + + # Compute quality metrics + sorting_analyzer.compute('quality_metrics', metric_names=["snr", "firing_rate"], skip_pc_metrics=True) + + return sorting_analyzer + + except Exception as e: + print(f"An error occurred: {e}") + return None + +def create_folders(base_name, group=False): + prefix = 'group_' if group else '' + sorter_folder = Path(f'output/AN/kilosort/{prefix}sorter_{base_name}') + analyzer_folder = Path(f'output/AN/kilosort/{prefix}analyzer_{base_name}') + phy_output_folder = Path(f'output/AN/kilosort/{prefix}phy_{base_name}') + + return sorter_folder, analyzer_folder, phy_output_folder \ No newline at end of file diff --git a/spikes_corto.ipynb b/.ipynb_checkpoints/sorterpipeline-checkpoint.ipynb similarity index 78% rename from spikes_corto.ipynb rename to .ipynb_checkpoints/sorterpipeline-checkpoint.ipynb index d47e030..4417a57 100644 --- a/spikes_corto.ipynb +++ b/.ipynb_checkpoints/sorterpipeline-checkpoint.ipynb @@ -2,20 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "1a16521b-14b7-4381-9cf3-41532d427539", "metadata": { "scrolled": true }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "12:07:24 [I] klustakwik KlustaKwik2 version 0.2.6\n" - ] - } - ], + "outputs": [], "source": [ "# Dependencias.\n", "import scipy.io\n", @@ -32,7 +24,7 @@ "import spikeinterface.sorters as ss\n", "import spikeinterface.exporters as exp\n", "import seaborn as sns # <- Para graficos estadisticos\n", - "from preprocessing_functions import read_rhd, get_recording, check_concatenation, process_artifacts\n", + "from preprocessing_functions import read_rhd, get_recording, check_concatenation, process_artifacts, espigas, sorting_analyzer, create_folders\n", "\n", "from pathlib import Path\n", "from spikeinterface.extractors import read_intan\n", @@ -42,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "0d3a9305-152a-4a31-9f23-0fcc756c85ef", "metadata": {}, "outputs": [], @@ -57,64 +49,6 @@ "si.set_global_job_kwargs(**job_kwargs)" ] }, - { - "cell_type": "markdown", - "id": "b3fa7ed1-75c0-4647-a66a-052844ee176b", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "### Auxiliares" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a6fa5110-3fa3-41a3-b9f5-48bafd83df7b", - "metadata": {}, - "outputs": [], - "source": [ - "def espigas(sorter):\n", - " sorting = sorter\n", - "\n", - "# Obtener el número de clusters\n", - " unit_ids = sorter.get_unit_ids()\n", - " num_clusters = len(unit_ids)\n", - "\n", - "# Obtener el número total de espigas\n", - " total_spikes = sum([len(sorting.get_unit_spike_train(unit_id)) for unit_id in unit_ids])\n", - " return num_clusters, total_spikes\n", - "\n", - "def sorting_analyzer(sorting, recording, output_folder):\n", - " try:\n", - " sorting_analyzer = si.create_sorting_analyzer(sorting=sorting, \n", - " recording=recording, \n", - " sparse=True,\n", - " format=\"binary_folder\",\n", - " folder=output_folder)\n", - "\n", - " # Compute necessary metrics\n", - " sorting_analyzer.compute(['random_spikes', 'waveforms', 'templates', 'noise_levels'])\n", - " sorting_analyzer.compute(['principal_components', 'spike_amplitudes', 'template_similarity', 'unit_locations', 'correlograms'],)\n", - " \n", - " # Compute quality metrics\n", - " sorting_analyzer.compute('quality_metrics', metric_names=[\"snr\", \"firing_rate\"], skip_pc_metrics=True)\n", - " \n", - " return sorting_analyzer\n", - " \n", - " except Exception as e:\n", - " print(f\"An error occurred: {e}\")\n", - " return None\n", - "\n", - "def create_folders(base_name, group=False):\n", - " prefix = 'group_' if group else ''\n", - " sorter_folder = Path(f'output/AN/kilosort/{prefix}sorter_{base_name}')\n", - " analyzer_folder = Path(f'output/AN/kilosort/{prefix}analyzer_{base_name}')\n", - " phy_output_folder = Path(f'output/AN/kilosort/{prefix}phy_{base_name}')\n", - " \n", - " return sorter_folder, analyzer_folder, phy_output_folder" - ] - }, { "cell_type": "markdown", "id": "f4140b07-598a-453e-b6a3-b21265267b0d", @@ -157,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "32e68d41-2dfe-49a7-a3a3-243d33ce3d90", "metadata": {}, "outputs": [], @@ -170,34 +104,16 @@ "\n", "# Archivos de Excel para la información de registros\n", "# como inicio se recomienda solo poner un dia en el excel de información de archivos, este es el punto de inicio para hacer una maquina de salchichas para todos los días del animal\n", - "excel_file_maze = \"input_files/PF07/Maze/informacion_archivos.xlsx\"\n", - "excel_file_sleep = \"input_files/PF07/Sleep/informacion_archivos.xlsx\"" + "excel_file_maze = r'C:\\Users\\Labcn\\OneDrive - Universidad Católica de Chile\\sorting_anillo\\input_files\\PF07\\Maze\\informacion_archivos.xlsx'\n", + "excel_file_sleep = r'C:\\Users\\Labcn\\OneDrive - Universidad Católica de Chile\\sorting_anillo\\input_files\\PF07\\Sleep\\informacion_archivos.xlsx'" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "92ff5fcb-ac0f-4eed-b5e7-874c50813815", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Leyendo [WindowsPath('C:/Users/Labcn/OneDrive - Universidad Católica de Chile/sorting_anillo/input_files/PF07/Maze/PF07_maze_Day09_240506_101500.rhd')]\n", - "Procesado 1 archivo .rhd.\n", - "C:\\Users\\Labcn\\OneDrive - Universidad Católica de Chile\\sorting_anillo\\input_files\\PF07\\Maze\\LAN_PF07_maze_Day09_240506_101500_500.mat\n", - "Solo un registro disponible para concatenar.\n", - "Procesamiento de archivos completado\n", - "Leyendo [WindowsPath('C:/Users/Labcn/OneDrive - Universidad Católica de Chile/sorting_anillo/input_files/PF07/Sleep/PF07_Sleep_Day09_240506_114146.rhd')]\n", - "Procesado 1 archivo .rhd.\n", - "C:\\Users\\Labcn\\OneDrive - Universidad Católica de Chile\\sorting_anillo\\input_files\\PF07\\Sleep\\LAN_PF07_Sleep_Day09_240506_114146_500.mat\n", - "Solo un registro disponible para concatenar.\n", - "Procesamiento de archivos completado\n", - "Registros de Maze y Sueño concatenados exitosamente.\n" - ] - } - ], + "outputs": [], "source": [ "# Procesar archivos\n", "record_maze = get_recording(excel_file_maze, probegroup_file)\n", @@ -225,76 +141,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "3fe4dda9-ffe2-4489-a77c-bc39b72c87df", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "<div style='border:1px solid #ddd; padding:10px;'><strong>ConcatenateSegmentRecording: 16 channels - 20.0kHz - 1 segments - 152,651,648 samples - 7,632.58s (2.12 hours) - int16 dtype - 4.55 GiB</strong></div><details style='margin-left: 10px;'> <summary><strong>Channel IDs</strong></summary><ul>['A-008' 'A-009' 'A-010' 'A-011' 'A-012' 'A-013' 'A-014' 'A-015' 'A-016'\n", - " 'A-017' 'A-018' 'A-019' 'A-020' 'A-021' 'A-022' 'A-023'] </details><details style='margin-left: 10px;'> <summary><strong>Annotations</strong></summary><ul><li> <strong> is_filtered </strong>: True</li><li> <strong> name </strong>: None</li><li> <strong> probe_0_planar_contour </strong>: [[ -20. 80.]\n", - " [ -20. -20.]\n", - " [ 0. -100.]\n", - " [ 20. -20.]\n", - " [ 20. 80.]]</li><li> <strong> probe_1_planar_contour </strong>: [[ 980. 80.]\n", - " [ 980. -20.]\n", - " [1000. -100.]\n", - " [1020. -20.]\n", - " [1020. 80.]]</li><li> <strong> probe_2_planar_contour </strong>: [[1980. 80.]\n", - " [1980. -20.]\n", - " [2000. -100.]\n", - " [2020. -20.]\n", - " [2020. 80.]]</li><li> <strong> probe_3_planar_contour </strong>: [[2980. 80.]\n", - " [2980. -20.]\n", - " [3000. -100.]\n", - " [3020. -20.]\n", - " [3020. 80.]]</li></ul> </details><details style='margin-left: 10px;'><summary><strong>Channel Properties</strong></summary><ul><details><summary> <strong> gain_to_uV </strong> </summary>[0.195 0.195 0.195 0.195 0.195 0.195 0.195 0.195 0.195 0.195 0.195 0.195\n", - " 0.195 0.195 0.195 0.195]</details><details><summary> <strong> offset_to_uV </strong> </summary>[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]</details><details><summary> <strong> channel_names </strong> </summary>['A-008' 'A-009' 'A-010' 'A-011' 'A-012' 'A-013' 'A-014' 'A-015' 'A-016'\n", - " 'A-017' 'A-018' 'A-019' 'A-020' 'A-021' 'A-022' 'A-023']</details><details><summary> <strong> contact_vector </strong> </summary>[(0, 0., 0., 'circle', 5., '', '', 0, 'um', 1., 0., 0., 1.)\n", - " (0, 0., 20., 'circle', 5., '', '', 1, 'um', 1., 0., 0., 1.)\n", - " (0, 0., 40., 'circle', 5., '', '', 2, 'um', 1., 0., 0., 1.)\n", - " (0, 0., 60., 'circle', 5., '', '', 3, 'um', 1., 0., 0., 1.)\n", - " (1, 1000., 0., 'circle', 5., '', '', 4, 'um', 1., 0., 0., 1.)\n", - " (1, 1000., 20., 'circle', 5., '', '', 5, 'um', 1., 0., 0., 1.)\n", - " (1, 1000., 40., 'circle', 5., '', '', 6, 'um', 1., 0., 0., 1.)\n", - " (1, 1000., 60., 'circle', 5., '', '', 7, 'um', 1., 0., 0., 1.)\n", - " (2, 2000., 0., 'circle', 5., '', '', 8, 'um', 1., 0., 0., 1.)\n", - " (2, 2000., 20., 'circle', 5., '', '', 9, 'um', 1., 0., 0., 1.)\n", - " (2, 2000., 40., 'circle', 5., '', '', 10, 'um', 1., 0., 0., 1.)\n", - " (2, 2000., 60., 'circle', 5., '', '', 11, 'um', 1., 0., 0., 1.)\n", - " (3, 3000., 0., 'circle', 5., '', '', 12, 'um', 1., 0., 0., 1.)\n", - " (3, 3000., 20., 'circle', 5., '', '', 13, 'um', 1., 0., 0., 1.)\n", - " (3, 3000., 40., 'circle', 5., '', '', 14, 'um', 1., 0., 0., 1.)\n", - " (3, 3000., 60., 'circle', 5., '', '', 15, 'um', 1., 0., 0., 1.)]</details><details><summary> <strong> location </strong> </summary>[[ 0. 0.]\n", - " [ 0. 20.]\n", - " [ 0. 40.]\n", - " [ 0. 60.]\n", - " [1000. 0.]\n", - " [1000. 20.]\n", - " [1000. 40.]\n", - " [1000. 60.]\n", - " [2000. 0.]\n", - " [2000. 20.]\n", - " [2000. 40.]\n", - " [2000. 60.]\n", - " [3000. 0.]\n", - " [3000. 20.]\n", - " [3000. 40.]\n", - " [3000. 60.]]</details><details><summary> <strong> group </strong> </summary>[0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3]</details></ul></details>" - ], - "text/plain": [ - "ConcatenateSegmentRecording: 16 channels - 20.0kHz - 1 segments - 152,651,648 samples \n", - " 7,632.58s (2.12 hours) - int16 dtype - 4.55 GiB" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# plot and check spikes\n", "mode = \"line\"\n", diff --git a/__pycache__/preprocessing_functions.cpython-312.pyc b/__pycache__/preprocessing_functions.cpython-312.pyc new file mode 100644 index 0000000..500d96a Binary files /dev/null and b/__pycache__/preprocessing_functions.cpython-312.pyc differ diff --git a/probes/Probegenerator_v3.ipynb b/probes/Probegenerator_v3.ipynb index 0122db3..c16ae66 100644 --- a/probes/Probegenerator_v3.ipynb +++ b/probes/Probegenerator_v3.ipynb @@ -360,7 +360,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.12.4" } }, "nbformat": 4, diff --git a/sorterpipeline.ipynb b/sorterpipeline.ipynb new file mode 100644 index 0000000..4417a57 --- /dev/null +++ b/sorterpipeline.ipynb @@ -0,0 +1,879 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1a16521b-14b7-4381-9cf3-41532d427539", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Dependencias.\n", + "import scipy.io\n", + "\n", + "%matplotlib widget\n", + "import h5py\n", + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import probeinterface as pi\n", + "import spikeinterface as si\n", + "import spikeinterface.widgets as sw\n", + "import spikeinterface.sorters as ss\n", + "import spikeinterface.exporters as exp\n", + "import seaborn as sns # <- Para graficos estadisticos\n", + "from preprocessing_functions import read_rhd, get_recording, check_concatenation, process_artifacts, espigas, sorting_analyzer, create_folders\n", + "\n", + "from pathlib import Path\n", + "from spikeinterface.extractors import read_intan\n", + "import spikeinterface.preprocessing as prep\n", + "from probeinterface import Probe, ProbeGroup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d3a9305-152a-4a31-9f23-0fcc756c85ef", + "metadata": {}, + "outputs": [], + "source": [ + "# global kwargs for parallel computing\n", + "job_kwargs = dict(\n", + " n_jobs=-1,\n", + " chunk_duration='1s',\n", + " progress_bar=True,\n", + ")\n", + "\n", + "si.set_global_job_kwargs(**job_kwargs)" + ] + }, + { + "cell_type": "markdown", + "id": "f4140b07-598a-453e-b6a3-b21265267b0d", + "metadata": {}, + "source": [ + "## Lectura y Preprocesado.\n", + "el procesado de datos se realiza de la siguiente forma: \n", + "```mermaid\n", + "flowchart TD\n", + " A[\"Datos crudos (Raw)\"] --> B[\"Filtro pasa banda (500 - 9000Hz)\"]\n", + " B --> C[\"Incorporacion de electrodos y definición de grupos\"]\n", + " C --> D[\"Definición de canales malos\"]\n", + " D --> E[\"Remoción de artefactos\"]\n", + " E --> F[\"`Guardado en formato binario (carpeta **preprocess**)`\"]\n", + " \n", + "```\n", + "Algunas opciones comunes de group_mode en SpikeInterface incluyen:\n", + "\n", + "by_probe: Agrupa los canales o unidades según el probe (sonda) al que pertenecen. Esto es útil si se están utilizando varias sondas en el experimento.\n", + "\n", + "by_shank: Agrupa los canales según el shank (tallo) dentro de un probe. Esto se usa cuando un probe tiene múltiples shanks.\n", + "\n", + "by_electrode_group: Agrupa los canales por el grupo de electrodos. Esto permite analizar o procesar datos por grupos predefinidos de electrodos.\n", + "\n", + "by_channel: Trata cada canal por separado, sin agruparlos. Es útil cuando se quiere analizar cada canal de manera independiente.\n", + "\n", + "by_unit: Agrupa los datos según unidades individuales de spikes. Esto es útil cuando se quiere analizar características a nivel de unidades de spikes individuales en lugar de a nivel de canales.\n", + "\n", + "all: Agrupa todos los canales o unidades en un solo grupo, tratándolos como un conjunto único. Esto puede ser útil para obtener un análisis global del conjunto de datos." + ] + }, + { + "cell_type": "markdown", + "id": "79e7d9ac-459c-4825-a684-8f24302c774f", + "metadata": {}, + "source": [ + "### Defición de parametros.\n", + "para el ejemplo, se almacenan todos los pasos intermedios, estos " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32e68d41-2dfe-49a7-a3a3-243d33ce3d90", + "metadata": {}, + "outputs": [], + "source": [ + "# Configuración de archivos\n", + "probegroup_file = 'probes/anillo_probe.json' # Archivo de configuración del probegroup\n", + "\n", + "# Configuración de carpetas de procesamiento\n", + "preprocess_folder = Path('preprocess/')\n", + "\n", + "# Archivos de Excel para la información de registros\n", + "# como inicio se recomienda solo poner un dia en el excel de información de archivos, este es el punto de inicio para hacer una maquina de salchichas para todos los días del animal\n", + "excel_file_maze = r'C:\\Users\\Labcn\\OneDrive - Universidad Católica de Chile\\sorting_anillo\\input_files\\PF07\\Maze\\informacion_archivos.xlsx'\n", + "excel_file_sleep = r'C:\\Users\\Labcn\\OneDrive - Universidad Católica de Chile\\sorting_anillo\\input_files\\PF07\\Sleep\\informacion_archivos.xlsx'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92ff5fcb-ac0f-4eed-b5e7-874c50813815", + "metadata": {}, + "outputs": [], + "source": [ + "# Procesar archivos\n", + "record_maze = get_recording(excel_file_maze, probegroup_file)\n", + "record_sleep = get_recording(excel_file_sleep, probegroup_file)\n", + "recording = check_concatenation(record_maze, record_sleep)\n", + "\n", + "# Crear carpeta para picos para sorting manual si es necesario\n", + "peak_folder = Path('output/manual/peaks')\n", + "# peak_folder.mkdir(exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "ea9b4c41-9562-4cd9-af7e-e714c0f37931", + "metadata": {}, + "source": [ + "### Inspeccion de los resultados.\n", + "Revise:\n", + "Se haya realizado el procedimiento de concatenado.\n", + "Se removieron los artefactos.\n", + "Nombres de los canales.\n", + "\n", + "importante: Con el fin de mejorar la visualización, los datos no estan escalados. Para presentarlos escalados (uV) defina return_scaled=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fe4dda9-ffe2-4489-a77c-bc39b72c87df", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# plot and check spikes\n", + "mode = \"line\"\n", + "w = sw.plot_traces(recording=recording, \n", + " mode=mode, time_range=(0, 105), color_groups=True, return_scaled=False,\n", + " show_channel_ids=True, order_channel_by_depth=False, backend=\"ephyviewer\")\n", + "\n", + "recording" + ] + }, + { + "cell_type": "markdown", + "id": "9bb98aae-9c2f-4826-b140-93c451c02e06", + "metadata": {}, + "source": [ + "### Eliminacion de canales.\n", + "indique los canales a eliminar, para ello utilice el nombre (\"Channel ID\") que se le da en Intan. Observe el ejemplo.\n", + "Confirme que se haya realizado la eliminación ejecutando la celda anterior." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8502da2c-dd2d-4e7a-9574-9d1f1f18f1bc", + "metadata": {}, + "outputs": [], + "source": [ + "bad_channels =[\"A-015\", \"A-016\"]\n", + "recording=recording.remove_channels(bad_channels)\n" + ] + }, + { + "cell_type": "markdown", + "id": "4f5a2412-8fde-4881-be0e-175d12bda873", + "metadata": {}, + "source": [ + "### Guardar archivos preprocesados\n", + "Guardar archivos preprocesados como binarios." + ] + }, + { + "cell_type": "markdown", + "id": "84dc9569-c570-4dcb-b715-67e97712f065", + "metadata": {}, + "source": [ + "#### Guardar el registro completo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75e2ede9-d6ef-4477-9553-cba613fd6fa0", + "metadata": {}, + "outputs": [], + "source": [ + "recording.save(folder=preprocess_folder, overwrite=True, **job_kwargs)\n", + " # rec_artifacts.save(folder=preprocess_artifacts, **job_kwargs) # ejercicio para guardar registro sin la eliminaciòn de artefactos." + ] + }, + { + "cell_type": "markdown", + "id": "cf93af84-d1e6-4a13-aed0-59f59f771406", + "metadata": {}, + "source": [ + "#### Guardar un segmento del registro" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a338f139-2612-4936-ae1c-2702fd8031b1", + "metadata": {}, + "outputs": [], + "source": [ + "sliced_rec=recording.time_slice(start_time=0, end_time=2000)\n", + "sliced_rec.save(format='binary', folder=Path('preprocess/slic3d'), overwrite=True, **job_kwargs)" + ] + }, + { + "cell_type": "markdown", + "id": "42fc8a15-6918-4408-9e5f-0386978bf96b", + "metadata": {}, + "source": [ + "#### Leer el registro guardado" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b962549a-ae2b-4900-9b3e-202a30b952af", + "metadata": {}, + "outputs": [], + "source": [ + "recording = si.load_extractor(preprocess_folder)" + ] + }, + { + "cell_type": "markdown", + "id": "03924393-f90e-479d-a3f5-f2f51cb305f3", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## Estimar ruido\n", + "\n", + "Estimate noise for each channel using MAD methods (median absolute deviation). You can use standard deviation with method=”std”\n", + "Internally it samples some chunk across segment. And then, it use MAD estimator (more robust than STD)\n", + "\n", + "detalle para el calculo de chunks\n", + "```python\n", + " random_chunks = get_random_data_chunks(recording, return_scaled=return_scaled, **random_chunk_kwargs)\n", + "```\n", + " \n", + "detalle del codigo para calcular el ruido:\n", + "```python\n", + " if method == \"mad\":\n", + " med = np.median(random_chunks, axis=0, keepdims=True)\n", + " # hard-coded so that core doesn't depend on scipy\n", + " noise_levels = np.median(np.abs(random_chunks - med), axis=0) / 0.6744897501960817\n", + "```\n", + " \n", + "ref, API: https://github.com/SpikeInterface/spikeinterface/blob/a9b4c34200be91a589ad145c976c5a177ce6746e/src/spikeinterface/core/recording_tools.py#L82C5-L100\n", + "\n", + "interesante discusión en:\n", + "https://github.com/SpikeInterface/spikeinterface/issues/1681" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97e36ce4-3f4d-49ce-8408-6a4f6e8e8008", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Calcular los niveles de ruido\n", + "noise_levels = si.get_noise_levels(recording, return_scaled=True, method=\"mad\")\n", + "\n", + "# Crear una figura más grande\n", + "fig, ax1 = plt.subplots(figsize=(10, 6))\n", + "\n", + "# Histograma en el primer eje (ax1)\n", + "color_hist = 'skyblue'\n", + "ax1.hist(noise_levels, bins='auto', color=color_hist, alpha=0.7, edgecolor='black')\n", + "ax1.set_xlabel('Noise Level (MAD)', fontsize=14)\n", + "ax1.set_ylabel('Frequency', fontsize=14, color=color_hist)\n", + "ax1.tick_params(axis='y', labelcolor=color_hist)\n", + "\n", + "# Calcular y agregar línea vertical para la media en el primer eje\n", + "mean_noise = np.mean(noise_levels)\n", + "ax1.axvline(mean_noise, color='blue', linestyle='dashed', linewidth=2, label=f'Mean: {mean_noise:.2f}')\n", + "\n", + "# Calcular y agregar línea para la desviación estándar en el primer eje\n", + "std_noise = np.std(noise_levels)\n", + "ax1.axvline(mean_noise + std_noise, color='green', linestyle='dashed', linewidth=1, label=f'Std: {std_noise:.2f}')\n", + "ax1.axvline(mean_noise - std_noise, color='green', linestyle='dashed', linewidth=1)\n", + "\n", + "# Agregar leyenda al primer eje\n", + "ax1.legend()\n", + "\n", + "# Crear el segundo eje para la densidad (ax2) que comparte el mismo eje x\n", + "ax2 = ax1.twinx()\n", + "color_density = 'red'\n", + "\n", + "# Agregar línea de densidad con seaborn en el segundo eje\n", + "sns.kdeplot(noise_levels, color=color_density, linewidth=2, ax=ax2)\n", + "ax2.set_ylabel('Density', fontsize=14, color=color_density)\n", + "ax2.tick_params(axis='y', labelcolor=color_density)\n", + "\n", + "# Agregar título principal\n", + "plt.title('Noise Levels Across Channels', fontsize=16, fontweight='bold')\n", + "\n", + "# Mejorar los ticks del eje x\n", + "ax1.tick_params(axis='x', labelsize=12)\n", + "ax2.tick_params(axis='y', labelsize=12)\n", + "\n", + "# Agregar gridlines para mayor claridad\n", + "ax1.grid(True, linestyle='--', alpha=0.6)\n", + "\n", + "# Ajustar el layout para evitar que se superpongan elementos\n", + "plt.tight_layout()\n", + "\n", + "# Mostrar la gráfica\n", + "plt.show()\n", + "noise_levels" + ] + }, + { + "cell_type": "markdown", + "id": "07b188a3-c16f-4a62-b126-d6b77bfbdbbf", + "metadata": {}, + "source": [ + "## Detectar peaks\n", + "La detección de peaks suele ser el primer paso en la clasificación de señales y consiste en encontrar peaks en los registros que podrían ser spikes reales.”\n", + "\n", + "Different methods are available with the method argument:\n", + "\n", + "‘by_channel’ (default): peaks are detected separately for each channel\n", + "‘locally_exclusive’ (requires numba): peaks on neighboring channels within a certain radius are excluded (not counted multiple times)\n", + "‘by_channel_torch’ (requires torch): pytorch implementation (GPU-compatible) that uses max pooling for time deduplication\n", + "‘locally_exclusive_torch’ (requires torch): pytorch implementation (GPU-compatible) that uses max pooling for space-time deduplication\n", + "\n", + "**si quiere cambiar el método, debes eliminar el archivo peaks.npy antes de calcular**\n", + "\n", + "[lista de parametros API](https://spikeinterface.readthedocs.io/en/latest/api.html#spikeinterface.sortingcomponents.peak_detection.detect_peaks) \n", + "referencias sobre el paso de [deteccion de espigas](https://spikeinterface.readthedocs.io/en/latest/modules/sortingcomponents.html#peak-detection)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01a54a48-ccb6-4aca-bbcc-b1b1bd5cabe6", + "metadata": {}, + "outputs": [], + "source": [ + "peak_folder = Path('output/manual/peaks')\n", + "peak_folder.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8167e03-9f1e-44c9-a2ae-1cb33152cac0", + "metadata": {}, + "outputs": [], + "source": [ + "from spikeinterface.sortingcomponents.peak_detection import detect_peaks\n", + "if not (peak_folder / 'peaks.npy').exists():\n", + " peaks = detect_peaks(\n", + " recording=recording,\n", + " method='locally_exclusive',\n", + " peak_sign='neg',\n", + " detect_threshold=5, # Threshold, in median absolute deviations (MAD), to use to detect peaks\n", + " noise_levels=noise_levels,\n", + " **job_kwargs,\n", + " )\n", + " np.save(peak_folder / 'peaks.npy', peaks)\n", + "peaks = np.load(peak_folder / 'peaks.npy')\n", + "print(\"cantidad de eventos:\", peaks.shape)\n", + "print(\"Tipo de peaks:\", type(peaks))\n", + "print(\"Contenido de peaks:\", peaks)\n", + "print(\"Tipo de datos en peaks:\", peaks.dtype)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b062afc8-bd5e-40af-9708-b60b70fe4149", + "metadata": {}, + "outputs": [], + "source": [ + "from spikeinterface.sortingcomponents.peak_localization import localize_peaks\n", + "\n", + "# método centro de masas con largo 0.9s, 0.3 antes y 0.6 despues\n", + "\n", + "if not (peak_folder / 'peak_locations_center_of_mass.npy').exists():\n", + " peak_locations = localize_peaks(\n", + " recording=rec_fil,\n", + " peaks=peaks,\n", + " ms_before=0.3,\n", + " ms_after=0.6,\n", + " method='center_of_mass',\n", + " **job_kwargs,\n", + " )\n", + " np.save(peak_folder / 'peak_locations_center_of_mass.npy', peak_locations)\n", + " print(peak_locations.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1417dac5-b908-4e31-9dcd-7f7c943f08b4", + "metadata": {}, + "outputs": [], + "source": [ + "# método monopolar_triangulation\n", + "\n", + "if not (peak_folder / 'peak_locations_monopolar_triangulation_legacy.npy').exists():\n", + " peak_locations = localize_peaks(\n", + " recording=rec_fil,\n", + " peaks=peaks,\n", + " ms_before=0.3,\n", + " ms_after=0.6,\n", + " method='monopolar_triangulation',\n", + " **job_kwargs,\n", + " )\n", + " np.save(peak_folder / 'peak_locations_monopolar_triangulation_legacy.npy', peak_locations)\n", + " print(peak_locations.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "c99687fb-cd6a-497c-8cf7-deef7970e281", + "metadata": {}, + "source": [ + "## Ejecutar un sorter\n", + "ejecutar Kilosort\n", + "\n", + "### revisar parametros de entrada (configuracion)\n", + "parametros especificos para kilosort (pasables por kwargs): \n", + "https://kilosort.readthedocs.io/en/latest/parameters.html \n", + "https://github.com/MouseLand/Kilosort/blob/main/kilosort/parameters.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00e7c619-a581-47a0-a82e-b4e13ffb0333", + "metadata": {}, + "outputs": [], + "source": [ + "# limpiar la memoria de torch antes de procesar el sorter.\n", + "import torch\n", + "torch.cuda.empty_cache()\n", + "\n", + "ss.get_default_sorter_params('kilosort4')" + ] + }, + { + "cell_type": "markdown", + "id": "a20fcc41-2af7-47e2-a96b-6639d6d307e4", + "metadata": {}, + "source": [ + "## Kilosort 4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c510f7bd-13e2-46e7-8d12-652fceb5eb8c", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "base_name = 'Rev9'\n", + "\n", + "# Single sorting\n", + "sorter_folder, analyzer_folder, phy_output_folder = create_folders(base_name)\n", + "\n", + "# Group sorting\n", + "group_sorter_folder, group_analyzer_folder, group_phy_output_folder = create_folders(base_name, group=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "968ba2aa-d35c-4f67-9798-5e4f07c85f43", + "metadata": {}, + "outputs": [], + "source": [ + "params_kilosort4 = {## MAIN_PARAMETERS \n", + " 'batch_size': 60000,\n", + " 'nblocks': 0,\n", + " 'Th_universal': 7,\n", + " 'Th_learned': 6,\n", + " ## Preprocessing\n", + " 'artifact_threshold': 1000,\n", + " ## SPIKE DETECTION\n", + " 'min_template_size': 10,\n", + " 'template_sizes':5,\n", + " 'n_pcs':6,\n", + " 'templates_from_data': True,\n", + " 'nearest_chans': 4, \n", + " 'nearest_templates': 15,\n", + " 'max_channel_distance': 60,\n", + " 'Th_single_ch': 4,\n", + " ## Clustering\n", + " #'acg_threshold':0.15,\n", + " #'cluster_downsampling':10,\n", + " ## extras\n", + " #'binning_depth':4,\n", + " #'drift_smoothing':[0.3, 0.3, 0.3],\n", + " 'skip_kilosort_preprocessing': False,} # se crea un diccionario donde se pueden pasar las variables modificadas al sorter." + ] + }, + { + "cell_type": "markdown", + "id": "40df9d7e-f433-4a3b-a33e-08abfbc2acf2", + "metadata": {}, + "source": [ + "### Sorting en bulto" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a22ef0bf-4e5f-49ab-96b7-9b442dfb8e59", + "metadata": {}, + "outputs": [], + "source": [ + "sorter = ss.run_sorter(\n", + " sorter_name='kilosort4',\n", + " recording = recording,\n", + " verbose=True,\n", + " folder = sorter_folder,\n", + " remove_existing_folder=True, ## CUIDADO, SOBREESCRIBE LOS DATOS EN CASO DE HABER UNA CARPETA, PARA DESHABILITAR PONER =FALSE\n", + " **params_kilosort4)\n", + "\n", + "num_clusters, total_spikes= espigas(sorter)\n", + "print(f\"Número total de clusters: {num_clusters}\")\n", + "print(f\"Número total de espigas: {total_spikes}\")\n", + "\n", + "analyzer=sorting_analyzer(sorter, recording, output_folder=analyzer_folder)\n", + "\n", + "exp.export_to_phy(sorting_analyzer=analyzer, \n", + " remove_if_exists=True, \n", + " copy_binary=True, \n", + " output_folder=phy_output_folder)" + ] + }, + { + "cell_type": "markdown", + "id": "d9f26e72-5f02-405b-bc6a-e6c956704894", + "metadata": {}, + "source": [ + "### Sorting por grupo : Automatic splitting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a22a06f-c7b2-4341-888a-f155a0cb5119", + "metadata": {}, + "outputs": [], + "source": [ + "group_sorter = ss.run_sorter_by_property(\n", + " sorter_name='kilosort4',\n", + " recording = recording,\n", + " grouping_property='group',\n", + " verbose=True,\n", + " folder = group_sorter_folder,\n", + " remove_existing_folder=True, ## CUIDADO, SOBREESCRIBE LOS DATOS EN CASO DE HABER UNA CARPETA, PARA DESHABILITAR PONER =FALSE\n", + " **params_kilosort4)\n", + "\n", + "num_clusters, total_spikes= espigas(group_sorter)\n", + "print(f\"Número total de clusters: {num_clusters}\")\n", + "print(f\"Número total de espigas: {total_spikes}\")\n", + "\n", + "group_analyzer=sorting_analyzer(group_sorter, recording, output_folder=group_analyzer_folder)\n", + "\n", + "exp.export_to_phy(sorting_analyzer=group_analyzer, \n", + " remove_if_exists=True, \n", + " copy_binary=True, \n", + " output_folder=group_phy_output_folder) " + ] + }, + { + "cell_type": "markdown", + "id": "4258b7de-89dd-4bee-baee-e69824680d32", + "metadata": {}, + "source": [ + "### Sorting por grupo : Manual splitting (en desarrollo)" + ] + }, + { + "cell_type": "markdown", + "id": "770a2908-a11d-43d5-8652-6bdcf8eeac78", + "metadata": {}, + "source": [ + "El enfoque implementado tiene como objetivo mejorar el rendimiento del spike sorting al segmentar el proceso por grupos de canales o regiones de la grabación. Al realizar el spike sorting por partes, se reduce significativamente la demanda de memoria y el uso de GPU en cada ejecución. Esto es especialmente útil cuando se trabaja con grabaciones de gran tamaño o cuando la capacidad de la GPU es limitada.\n", + "\n", + "Dividiendo el registro en grupos, el sorter puede operar en fragmentos más pequeños, lo que permite manejar mejor los recursos del sistema y evitar problemas como errores de \"out of memory\" (falta de memoria). Después de realizar el spike sorting por grupos, los resultados se combinan, permitiendo un análisis y exportación global sin sacrificar la eficiencia durante el proceso de clasificación inicial.\n", + "\n", + "Este enfoque balancea el uso de recursos, optimizando el uso de la GPU sin comprometer la calidad del análisis final." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e76555d-dc7a-43f7-af64-cd1be41e235a", + "metadata": {}, + "outputs": [], + "source": [ + "# Dividir la grabación por grupos\n", + "split_recording = recording.split_by(\"group\")\n", + "\n", + "# Diccionario para almacenar los resultados de sorting por grupo\n", + "sortings = {}\n", + "\n", + "# Ejecutar el sorter en cada grupo\n", + "for group, sub_recording in split_recording.items():\n", + " sorting = run_sorter(\n", + " sorter_name='kilosort4',\n", + " recording=sub_recording, # Usar la subgrabación del grupo\n", + " output_folder=f\"fKS4_group{group}\"\n", + " )\n", + " sortings[group] = sorting # Almacenar los resultados del sorter para este grupo\n", + "\n", + "# Combinar los resultados de sorting de todos los grupos\n", + "combined_sorting = si.concatenate_sortings(*sortings.values())\n", + "\n", + "# Información de clusters y espigas para cada grupo\n", + "num_clusters, total_spikes = espigas(combined_sorting)\n", + "print(f\" Número total de clusters: {num_clusters}\")\n", + "print(f\" Número total de espigas: {total_spikes}\")\n", + "\n", + "# Realizar análisis global con el sorting combinado\n", + "combined_analyzer = sorting_analyzer(combined_sorting, recording, output_folder='combined_analyzer_folder')\n", + "\n", + "# Exportar los resultados globales a Phy\n", + "exp.export_to_phy(sorting=combined_analyzer, # Sorting combinado\n", + " recording=recording, # Grabación completa\n", + " remove_if_exists=True,\n", + " copy_binary=True,\n", + " output_folder='combined_phy_output_folder')\n" + ] + }, + { + "cell_type": "markdown", + "id": "9a6c810e-a8dc-4cca-96de-abe8b43ea875", + "metadata": {}, + "source": [ + "## inspeccion en grafico" + ] + }, + { + "cell_type": "markdown", + "id": "2da393de-3376-4b3a-89f3-c35f19adb28b", + "metadata": {}, + "source": [ + "### leer un analisis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81cbc069-2452-4ab6-8b24-6dfda650256b", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "folder = 'output/AN/kilosort/analyzer_Rev9'\n", + "analyzer = si.load_sorting_analyzer(folder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b6facf1-61dd-442e-8b14-06b05ac87b94", + "metadata": {}, + "outputs": [], + "source": [ + "exp.export_to_phy(sorting_analyzer=analyzer, \n", + " remove_if_exists=True, \n", + " copy_binary=True, \n", + " output_folder=Path('output/AN/kilosort/phy_rev9v2'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d6d0056-9fc6-4496-b6e1-c98c5bc9ce95", + "metadata": {}, + "outputs": [], + "source": [ + "import spikeinterface.widgets as sw\n", + "\n", + "sw.plot_spikes_on_traces(sorting_analyzer= analyzer, \n", + " segment_index=None, \n", + " channel_ids=None, \n", + " unit_ids=None, \n", + " order_channel_by_depth=False, \n", + " time_range=None, \n", + " unit_colors=None, \n", + " sparsity=None, \n", + " mode='auto', \n", + " return_scaled=False, \n", + " cmap='RdBu', \n", + " show_channel_ids=False, \n", + " color_groups=False, \n", + " color=None, \n", + " clim=None, \n", + " tile_size=512, \n", + " seconds_per_row=0.2, \n", + " scale=1, \n", + " spike_width_ms=4, \n", + " spike_height_um=20, \n", + " with_colorbar=True, \n", + " backend=\"ipywidgets\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "7f84050f-d495-4c88-9a9c-ac8b169e4ff7", + "metadata": {}, + "source": [ + "## Unit quality metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e6abfd0-23d2-4d20-bdc0-18653aad0e99", + "metadata": {}, + "outputs": [], + "source": [ + "from spikeinterface.postprocessing import compute_principal_components\n", + "from spikeinterface.qualitymetrics import compute_quality_metrics, get_quality_metric_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d906c4b1-dc32-4894-bcc0-ed2382a08fca", + "metadata": {}, + "outputs": [], + "source": [ + "get_quality_metric_list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c15fe736-d828-486e-96bc-21b0f1e990a9", + "metadata": {}, + "outputs": [], + "source": [ + "metrics = compute_quality_metrics(analyzer, metric_names=[\"snr\", \"isi_violation\", \"nearest_neighbor\", \"firing_rate\", 'presence_ratio', 'amplitude_cutoff'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4d0fcd5-eb7b-4a1f-999a-5ec2e4db3d9b", + "metadata": {}, + "outputs": [], + "source": [ + "print (metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9b4dda3-81c6-4158-b568-d90645db1c53", + "metadata": {}, + "outputs": [], + "source": [ + "keep_mask = (metrics[\"amplitude_cutoff\"] < 1e-6)\n", + "print(keep_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "510e15fb-115d-46c5-be33-f3baa4c6d089", + "metadata": {}, + "outputs": [], + "source": [ + "keep_unit_ids = keep_mask[keep_mask].index.values\n", + "keep_unit_ids = [unit_id for unit_id in keep_unit_ids]\n", + "print(keep_unit_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c0af91-b06b-40bc-91d5-0e10cf0703f4", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.ndimage.filters import gaussian_filter1d\n", + "plt.rcParams.update({'font.size': 14})\n", + "\n", + "def plot_metric(data, bins, x_axis_label, color, max_value=-1):\n", + " \n", + " h, b = np.histogram(data, bins=bins, density=True)\n", + "\n", + " x = b[:-1]\n", + " y = gaussian_filter1d(h, 1)\n", + "\n", + " plt.plot(x, y, color=color)\n", + " plt.xlabel(x_axis_label)\n", + " plt.gca().get_yaxis().set_visible(False)\n", + " [plt.gca().spines[loc].set_visible(False) for loc in ['right', 'top', 'left']]\n", + " if max_value < np.max(y) * 1.1:\n", + " max_value = np.max(y) * 1.1\n", + " plt.ylim([0, max_value])\n", + " \n", + " return max_value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99de07c7-e341-41ab-9795-9c5516e9a888", + "metadata": {}, + "outputs": [], + "source": [ + "ss.get_default_sorter_params('spykingcircus2')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1e0f0a4-2a70-4840-a467-233fc57220b8", + "metadata": {}, + "outputs": [], + "source": [ + "sorter_spykingcircus2 = ss.run_sorter(\n", + " sorter_name='spykingcircus2',\n", + " recording = rec_fil,\n", + " verbose=True,\n", + " folder = 'output/spykingcircus2',\n", + " remove_existing_folder=True ## CUIDADO, SOBREESCRIBE LOS DATOS EN CASO DE HABER UNA CARPETA, PARA DESHABILITAR PONER =FALSE\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sorterpipeline_colabtest.ipynb b/sorterpipeline_colabtest.ipynb new file mode 100644 index 0000000..4417a57 --- /dev/null +++ b/sorterpipeline_colabtest.ipynb @@ -0,0 +1,879 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1a16521b-14b7-4381-9cf3-41532d427539", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Dependencias.\n", + "import scipy.io\n", + "\n", + "%matplotlib widget\n", + "import h5py\n", + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import probeinterface as pi\n", + "import spikeinterface as si\n", + "import spikeinterface.widgets as sw\n", + "import spikeinterface.sorters as ss\n", + "import spikeinterface.exporters as exp\n", + "import seaborn as sns # <- Para graficos estadisticos\n", + "from preprocessing_functions import read_rhd, get_recording, check_concatenation, process_artifacts, espigas, sorting_analyzer, create_folders\n", + "\n", + "from pathlib import Path\n", + "from spikeinterface.extractors import read_intan\n", + "import spikeinterface.preprocessing as prep\n", + "from probeinterface import Probe, ProbeGroup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d3a9305-152a-4a31-9f23-0fcc756c85ef", + "metadata": {}, + "outputs": [], + "source": [ + "# global kwargs for parallel computing\n", + "job_kwargs = dict(\n", + " n_jobs=-1,\n", + " chunk_duration='1s',\n", + " progress_bar=True,\n", + ")\n", + "\n", + "si.set_global_job_kwargs(**job_kwargs)" + ] + }, + { + "cell_type": "markdown", + "id": "f4140b07-598a-453e-b6a3-b21265267b0d", + "metadata": {}, + "source": [ + "## Lectura y Preprocesado.\n", + "el procesado de datos se realiza de la siguiente forma: \n", + "```mermaid\n", + "flowchart TD\n", + " A[\"Datos crudos (Raw)\"] --> B[\"Filtro pasa banda (500 - 9000Hz)\"]\n", + " B --> C[\"Incorporacion de electrodos y definición de grupos\"]\n", + " C --> D[\"Definición de canales malos\"]\n", + " D --> E[\"Remoción de artefactos\"]\n", + " E --> F[\"`Guardado en formato binario (carpeta **preprocess**)`\"]\n", + " \n", + "```\n", + "Algunas opciones comunes de group_mode en SpikeInterface incluyen:\n", + "\n", + "by_probe: Agrupa los canales o unidades según el probe (sonda) al que pertenecen. Esto es útil si se están utilizando varias sondas en el experimento.\n", + "\n", + "by_shank: Agrupa los canales según el shank (tallo) dentro de un probe. Esto se usa cuando un probe tiene múltiples shanks.\n", + "\n", + "by_electrode_group: Agrupa los canales por el grupo de electrodos. Esto permite analizar o procesar datos por grupos predefinidos de electrodos.\n", + "\n", + "by_channel: Trata cada canal por separado, sin agruparlos. Es útil cuando se quiere analizar cada canal de manera independiente.\n", + "\n", + "by_unit: Agrupa los datos según unidades individuales de spikes. Esto es útil cuando se quiere analizar características a nivel de unidades de spikes individuales en lugar de a nivel de canales.\n", + "\n", + "all: Agrupa todos los canales o unidades en un solo grupo, tratándolos como un conjunto único. Esto puede ser útil para obtener un análisis global del conjunto de datos." + ] + }, + { + "cell_type": "markdown", + "id": "79e7d9ac-459c-4825-a684-8f24302c774f", + "metadata": {}, + "source": [ + "### Defición de parametros.\n", + "para el ejemplo, se almacenan todos los pasos intermedios, estos " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32e68d41-2dfe-49a7-a3a3-243d33ce3d90", + "metadata": {}, + "outputs": [], + "source": [ + "# Configuración de archivos\n", + "probegroup_file = 'probes/anillo_probe.json' # Archivo de configuración del probegroup\n", + "\n", + "# Configuración de carpetas de procesamiento\n", + "preprocess_folder = Path('preprocess/')\n", + "\n", + "# Archivos de Excel para la información de registros\n", + "# como inicio se recomienda solo poner un dia en el excel de información de archivos, este es el punto de inicio para hacer una maquina de salchichas para todos los días del animal\n", + "excel_file_maze = r'C:\\Users\\Labcn\\OneDrive - Universidad Católica de Chile\\sorting_anillo\\input_files\\PF07\\Maze\\informacion_archivos.xlsx'\n", + "excel_file_sleep = r'C:\\Users\\Labcn\\OneDrive - Universidad Católica de Chile\\sorting_anillo\\input_files\\PF07\\Sleep\\informacion_archivos.xlsx'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92ff5fcb-ac0f-4eed-b5e7-874c50813815", + "metadata": {}, + "outputs": [], + "source": [ + "# Procesar archivos\n", + "record_maze = get_recording(excel_file_maze, probegroup_file)\n", + "record_sleep = get_recording(excel_file_sleep, probegroup_file)\n", + "recording = check_concatenation(record_maze, record_sleep)\n", + "\n", + "# Crear carpeta para picos para sorting manual si es necesario\n", + "peak_folder = Path('output/manual/peaks')\n", + "# peak_folder.mkdir(exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "ea9b4c41-9562-4cd9-af7e-e714c0f37931", + "metadata": {}, + "source": [ + "### Inspeccion de los resultados.\n", + "Revise:\n", + "Se haya realizado el procedimiento de concatenado.\n", + "Se removieron los artefactos.\n", + "Nombres de los canales.\n", + "\n", + "importante: Con el fin de mejorar la visualización, los datos no estan escalados. Para presentarlos escalados (uV) defina return_scaled=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fe4dda9-ffe2-4489-a77c-bc39b72c87df", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# plot and check spikes\n", + "mode = \"line\"\n", + "w = sw.plot_traces(recording=recording, \n", + " mode=mode, time_range=(0, 105), color_groups=True, return_scaled=False,\n", + " show_channel_ids=True, order_channel_by_depth=False, backend=\"ephyviewer\")\n", + "\n", + "recording" + ] + }, + { + "cell_type": "markdown", + "id": "9bb98aae-9c2f-4826-b140-93c451c02e06", + "metadata": {}, + "source": [ + "### Eliminacion de canales.\n", + "indique los canales a eliminar, para ello utilice el nombre (\"Channel ID\") que se le da en Intan. Observe el ejemplo.\n", + "Confirme que se haya realizado la eliminación ejecutando la celda anterior." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8502da2c-dd2d-4e7a-9574-9d1f1f18f1bc", + "metadata": {}, + "outputs": [], + "source": [ + "bad_channels =[\"A-015\", \"A-016\"]\n", + "recording=recording.remove_channels(bad_channels)\n" + ] + }, + { + "cell_type": "markdown", + "id": "4f5a2412-8fde-4881-be0e-175d12bda873", + "metadata": {}, + "source": [ + "### Guardar archivos preprocesados\n", + "Guardar archivos preprocesados como binarios." + ] + }, + { + "cell_type": "markdown", + "id": "84dc9569-c570-4dcb-b715-67e97712f065", + "metadata": {}, + "source": [ + "#### Guardar el registro completo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75e2ede9-d6ef-4477-9553-cba613fd6fa0", + "metadata": {}, + "outputs": [], + "source": [ + "recording.save(folder=preprocess_folder, overwrite=True, **job_kwargs)\n", + " # rec_artifacts.save(folder=preprocess_artifacts, **job_kwargs) # ejercicio para guardar registro sin la eliminaciòn de artefactos." + ] + }, + { + "cell_type": "markdown", + "id": "cf93af84-d1e6-4a13-aed0-59f59f771406", + "metadata": {}, + "source": [ + "#### Guardar un segmento del registro" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a338f139-2612-4936-ae1c-2702fd8031b1", + "metadata": {}, + "outputs": [], + "source": [ + "sliced_rec=recording.time_slice(start_time=0, end_time=2000)\n", + "sliced_rec.save(format='binary', folder=Path('preprocess/slic3d'), overwrite=True, **job_kwargs)" + ] + }, + { + "cell_type": "markdown", + "id": "42fc8a15-6918-4408-9e5f-0386978bf96b", + "metadata": {}, + "source": [ + "#### Leer el registro guardado" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b962549a-ae2b-4900-9b3e-202a30b952af", + "metadata": {}, + "outputs": [], + "source": [ + "recording = si.load_extractor(preprocess_folder)" + ] + }, + { + "cell_type": "markdown", + "id": "03924393-f90e-479d-a3f5-f2f51cb305f3", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## Estimar ruido\n", + "\n", + "Estimate noise for each channel using MAD methods (median absolute deviation). You can use standard deviation with method=”std”\n", + "Internally it samples some chunk across segment. And then, it use MAD estimator (more robust than STD)\n", + "\n", + "detalle para el calculo de chunks\n", + "```python\n", + " random_chunks = get_random_data_chunks(recording, return_scaled=return_scaled, **random_chunk_kwargs)\n", + "```\n", + " \n", + "detalle del codigo para calcular el ruido:\n", + "```python\n", + " if method == \"mad\":\n", + " med = np.median(random_chunks, axis=0, keepdims=True)\n", + " # hard-coded so that core doesn't depend on scipy\n", + " noise_levels = np.median(np.abs(random_chunks - med), axis=0) / 0.6744897501960817\n", + "```\n", + " \n", + "ref, API: https://github.com/SpikeInterface/spikeinterface/blob/a9b4c34200be91a589ad145c976c5a177ce6746e/src/spikeinterface/core/recording_tools.py#L82C5-L100\n", + "\n", + "interesante discusión en:\n", + "https://github.com/SpikeInterface/spikeinterface/issues/1681" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97e36ce4-3f4d-49ce-8408-6a4f6e8e8008", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Calcular los niveles de ruido\n", + "noise_levels = si.get_noise_levels(recording, return_scaled=True, method=\"mad\")\n", + "\n", + "# Crear una figura más grande\n", + "fig, ax1 = plt.subplots(figsize=(10, 6))\n", + "\n", + "# Histograma en el primer eje (ax1)\n", + "color_hist = 'skyblue'\n", + "ax1.hist(noise_levels, bins='auto', color=color_hist, alpha=0.7, edgecolor='black')\n", + "ax1.set_xlabel('Noise Level (MAD)', fontsize=14)\n", + "ax1.set_ylabel('Frequency', fontsize=14, color=color_hist)\n", + "ax1.tick_params(axis='y', labelcolor=color_hist)\n", + "\n", + "# Calcular y agregar línea vertical para la media en el primer eje\n", + "mean_noise = np.mean(noise_levels)\n", + "ax1.axvline(mean_noise, color='blue', linestyle='dashed', linewidth=2, label=f'Mean: {mean_noise:.2f}')\n", + "\n", + "# Calcular y agregar línea para la desviación estándar en el primer eje\n", + "std_noise = np.std(noise_levels)\n", + "ax1.axvline(mean_noise + std_noise, color='green', linestyle='dashed', linewidth=1, label=f'Std: {std_noise:.2f}')\n", + "ax1.axvline(mean_noise - std_noise, color='green', linestyle='dashed', linewidth=1)\n", + "\n", + "# Agregar leyenda al primer eje\n", + "ax1.legend()\n", + "\n", + "# Crear el segundo eje para la densidad (ax2) que comparte el mismo eje x\n", + "ax2 = ax1.twinx()\n", + "color_density = 'red'\n", + "\n", + "# Agregar línea de densidad con seaborn en el segundo eje\n", + "sns.kdeplot(noise_levels, color=color_density, linewidth=2, ax=ax2)\n", + "ax2.set_ylabel('Density', fontsize=14, color=color_density)\n", + "ax2.tick_params(axis='y', labelcolor=color_density)\n", + "\n", + "# Agregar título principal\n", + "plt.title('Noise Levels Across Channels', fontsize=16, fontweight='bold')\n", + "\n", + "# Mejorar los ticks del eje x\n", + "ax1.tick_params(axis='x', labelsize=12)\n", + "ax2.tick_params(axis='y', labelsize=12)\n", + "\n", + "# Agregar gridlines para mayor claridad\n", + "ax1.grid(True, linestyle='--', alpha=0.6)\n", + "\n", + "# Ajustar el layout para evitar que se superpongan elementos\n", + "plt.tight_layout()\n", + "\n", + "# Mostrar la gráfica\n", + "plt.show()\n", + "noise_levels" + ] + }, + { + "cell_type": "markdown", + "id": "07b188a3-c16f-4a62-b126-d6b77bfbdbbf", + "metadata": {}, + "source": [ + "## Detectar peaks\n", + "La detección de peaks suele ser el primer paso en la clasificación de señales y consiste en encontrar peaks en los registros que podrían ser spikes reales.”\n", + "\n", + "Different methods are available with the method argument:\n", + "\n", + "‘by_channel’ (default): peaks are detected separately for each channel\n", + "‘locally_exclusive’ (requires numba): peaks on neighboring channels within a certain radius are excluded (not counted multiple times)\n", + "‘by_channel_torch’ (requires torch): pytorch implementation (GPU-compatible) that uses max pooling for time deduplication\n", + "‘locally_exclusive_torch’ (requires torch): pytorch implementation (GPU-compatible) that uses max pooling for space-time deduplication\n", + "\n", + "**si quiere cambiar el método, debes eliminar el archivo peaks.npy antes de calcular**\n", + "\n", + "[lista de parametros API](https://spikeinterface.readthedocs.io/en/latest/api.html#spikeinterface.sortingcomponents.peak_detection.detect_peaks) \n", + "referencias sobre el paso de [deteccion de espigas](https://spikeinterface.readthedocs.io/en/latest/modules/sortingcomponents.html#peak-detection)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01a54a48-ccb6-4aca-bbcc-b1b1bd5cabe6", + "metadata": {}, + "outputs": [], + "source": [ + "peak_folder = Path('output/manual/peaks')\n", + "peak_folder.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8167e03-9f1e-44c9-a2ae-1cb33152cac0", + "metadata": {}, + "outputs": [], + "source": [ + "from spikeinterface.sortingcomponents.peak_detection import detect_peaks\n", + "if not (peak_folder / 'peaks.npy').exists():\n", + " peaks = detect_peaks(\n", + " recording=recording,\n", + " method='locally_exclusive',\n", + " peak_sign='neg',\n", + " detect_threshold=5, # Threshold, in median absolute deviations (MAD), to use to detect peaks\n", + " noise_levels=noise_levels,\n", + " **job_kwargs,\n", + " )\n", + " np.save(peak_folder / 'peaks.npy', peaks)\n", + "peaks = np.load(peak_folder / 'peaks.npy')\n", + "print(\"cantidad de eventos:\", peaks.shape)\n", + "print(\"Tipo de peaks:\", type(peaks))\n", + "print(\"Contenido de peaks:\", peaks)\n", + "print(\"Tipo de datos en peaks:\", peaks.dtype)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b062afc8-bd5e-40af-9708-b60b70fe4149", + "metadata": {}, + "outputs": [], + "source": [ + "from spikeinterface.sortingcomponents.peak_localization import localize_peaks\n", + "\n", + "# método centro de masas con largo 0.9s, 0.3 antes y 0.6 despues\n", + "\n", + "if not (peak_folder / 'peak_locations_center_of_mass.npy').exists():\n", + " peak_locations = localize_peaks(\n", + " recording=rec_fil,\n", + " peaks=peaks,\n", + " ms_before=0.3,\n", + " ms_after=0.6,\n", + " method='center_of_mass',\n", + " **job_kwargs,\n", + " )\n", + " np.save(peak_folder / 'peak_locations_center_of_mass.npy', peak_locations)\n", + " print(peak_locations.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1417dac5-b908-4e31-9dcd-7f7c943f08b4", + "metadata": {}, + "outputs": [], + "source": [ + "# método monopolar_triangulation\n", + "\n", + "if not (peak_folder / 'peak_locations_monopolar_triangulation_legacy.npy').exists():\n", + " peak_locations = localize_peaks(\n", + " recording=rec_fil,\n", + " peaks=peaks,\n", + " ms_before=0.3,\n", + " ms_after=0.6,\n", + " method='monopolar_triangulation',\n", + " **job_kwargs,\n", + " )\n", + " np.save(peak_folder / 'peak_locations_monopolar_triangulation_legacy.npy', peak_locations)\n", + " print(peak_locations.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "c99687fb-cd6a-497c-8cf7-deef7970e281", + "metadata": {}, + "source": [ + "## Ejecutar un sorter\n", + "ejecutar Kilosort\n", + "\n", + "### revisar parametros de entrada (configuracion)\n", + "parametros especificos para kilosort (pasables por kwargs): \n", + "https://kilosort.readthedocs.io/en/latest/parameters.html \n", + "https://github.com/MouseLand/Kilosort/blob/main/kilosort/parameters.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00e7c619-a581-47a0-a82e-b4e13ffb0333", + "metadata": {}, + "outputs": [], + "source": [ + "# limpiar la memoria de torch antes de procesar el sorter.\n", + "import torch\n", + "torch.cuda.empty_cache()\n", + "\n", + "ss.get_default_sorter_params('kilosort4')" + ] + }, + { + "cell_type": "markdown", + "id": "a20fcc41-2af7-47e2-a96b-6639d6d307e4", + "metadata": {}, + "source": [ + "## Kilosort 4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c510f7bd-13e2-46e7-8d12-652fceb5eb8c", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "base_name = 'Rev9'\n", + "\n", + "# Single sorting\n", + "sorter_folder, analyzer_folder, phy_output_folder = create_folders(base_name)\n", + "\n", + "# Group sorting\n", + "group_sorter_folder, group_analyzer_folder, group_phy_output_folder = create_folders(base_name, group=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "968ba2aa-d35c-4f67-9798-5e4f07c85f43", + "metadata": {}, + "outputs": [], + "source": [ + "params_kilosort4 = {## MAIN_PARAMETERS \n", + " 'batch_size': 60000,\n", + " 'nblocks': 0,\n", + " 'Th_universal': 7,\n", + " 'Th_learned': 6,\n", + " ## Preprocessing\n", + " 'artifact_threshold': 1000,\n", + " ## SPIKE DETECTION\n", + " 'min_template_size': 10,\n", + " 'template_sizes':5,\n", + " 'n_pcs':6,\n", + " 'templates_from_data': True,\n", + " 'nearest_chans': 4, \n", + " 'nearest_templates': 15,\n", + " 'max_channel_distance': 60,\n", + " 'Th_single_ch': 4,\n", + " ## Clustering\n", + " #'acg_threshold':0.15,\n", + " #'cluster_downsampling':10,\n", + " ## extras\n", + " #'binning_depth':4,\n", + " #'drift_smoothing':[0.3, 0.3, 0.3],\n", + " 'skip_kilosort_preprocessing': False,} # se crea un diccionario donde se pueden pasar las variables modificadas al sorter." + ] + }, + { + "cell_type": "markdown", + "id": "40df9d7e-f433-4a3b-a33e-08abfbc2acf2", + "metadata": {}, + "source": [ + "### Sorting en bulto" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a22ef0bf-4e5f-49ab-96b7-9b442dfb8e59", + "metadata": {}, + "outputs": [], + "source": [ + "sorter = ss.run_sorter(\n", + " sorter_name='kilosort4',\n", + " recording = recording,\n", + " verbose=True,\n", + " folder = sorter_folder,\n", + " remove_existing_folder=True, ## CUIDADO, SOBREESCRIBE LOS DATOS EN CASO DE HABER UNA CARPETA, PARA DESHABILITAR PONER =FALSE\n", + " **params_kilosort4)\n", + "\n", + "num_clusters, total_spikes= espigas(sorter)\n", + "print(f\"Número total de clusters: {num_clusters}\")\n", + "print(f\"Número total de espigas: {total_spikes}\")\n", + "\n", + "analyzer=sorting_analyzer(sorter, recording, output_folder=analyzer_folder)\n", + "\n", + "exp.export_to_phy(sorting_analyzer=analyzer, \n", + " remove_if_exists=True, \n", + " copy_binary=True, \n", + " output_folder=phy_output_folder)" + ] + }, + { + "cell_type": "markdown", + "id": "d9f26e72-5f02-405b-bc6a-e6c956704894", + "metadata": {}, + "source": [ + "### Sorting por grupo : Automatic splitting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a22a06f-c7b2-4341-888a-f155a0cb5119", + "metadata": {}, + "outputs": [], + "source": [ + "group_sorter = ss.run_sorter_by_property(\n", + " sorter_name='kilosort4',\n", + " recording = recording,\n", + " grouping_property='group',\n", + " verbose=True,\n", + " folder = group_sorter_folder,\n", + " remove_existing_folder=True, ## CUIDADO, SOBREESCRIBE LOS DATOS EN CASO DE HABER UNA CARPETA, PARA DESHABILITAR PONER =FALSE\n", + " **params_kilosort4)\n", + "\n", + "num_clusters, total_spikes= espigas(group_sorter)\n", + "print(f\"Número total de clusters: {num_clusters}\")\n", + "print(f\"Número total de espigas: {total_spikes}\")\n", + "\n", + "group_analyzer=sorting_analyzer(group_sorter, recording, output_folder=group_analyzer_folder)\n", + "\n", + "exp.export_to_phy(sorting_analyzer=group_analyzer, \n", + " remove_if_exists=True, \n", + " copy_binary=True, \n", + " output_folder=group_phy_output_folder) " + ] + }, + { + "cell_type": "markdown", + "id": "4258b7de-89dd-4bee-baee-e69824680d32", + "metadata": {}, + "source": [ + "### Sorting por grupo : Manual splitting (en desarrollo)" + ] + }, + { + "cell_type": "markdown", + "id": "770a2908-a11d-43d5-8652-6bdcf8eeac78", + "metadata": {}, + "source": [ + "El enfoque implementado tiene como objetivo mejorar el rendimiento del spike sorting al segmentar el proceso por grupos de canales o regiones de la grabación. Al realizar el spike sorting por partes, se reduce significativamente la demanda de memoria y el uso de GPU en cada ejecución. Esto es especialmente útil cuando se trabaja con grabaciones de gran tamaño o cuando la capacidad de la GPU es limitada.\n", + "\n", + "Dividiendo el registro en grupos, el sorter puede operar en fragmentos más pequeños, lo que permite manejar mejor los recursos del sistema y evitar problemas como errores de \"out of memory\" (falta de memoria). Después de realizar el spike sorting por grupos, los resultados se combinan, permitiendo un análisis y exportación global sin sacrificar la eficiencia durante el proceso de clasificación inicial.\n", + "\n", + "Este enfoque balancea el uso de recursos, optimizando el uso de la GPU sin comprometer la calidad del análisis final." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e76555d-dc7a-43f7-af64-cd1be41e235a", + "metadata": {}, + "outputs": [], + "source": [ + "# Dividir la grabación por grupos\n", + "split_recording = recording.split_by(\"group\")\n", + "\n", + "# Diccionario para almacenar los resultados de sorting por grupo\n", + "sortings = {}\n", + "\n", + "# Ejecutar el sorter en cada grupo\n", + "for group, sub_recording in split_recording.items():\n", + " sorting = run_sorter(\n", + " sorter_name='kilosort4',\n", + " recording=sub_recording, # Usar la subgrabación del grupo\n", + " output_folder=f\"fKS4_group{group}\"\n", + " )\n", + " sortings[group] = sorting # Almacenar los resultados del sorter para este grupo\n", + "\n", + "# Combinar los resultados de sorting de todos los grupos\n", + "combined_sorting = si.concatenate_sortings(*sortings.values())\n", + "\n", + "# Información de clusters y espigas para cada grupo\n", + "num_clusters, total_spikes = espigas(combined_sorting)\n", + "print(f\" Número total de clusters: {num_clusters}\")\n", + "print(f\" Número total de espigas: {total_spikes}\")\n", + "\n", + "# Realizar análisis global con el sorting combinado\n", + "combined_analyzer = sorting_analyzer(combined_sorting, recording, output_folder='combined_analyzer_folder')\n", + "\n", + "# Exportar los resultados globales a Phy\n", + "exp.export_to_phy(sorting=combined_analyzer, # Sorting combinado\n", + " recording=recording, # Grabación completa\n", + " remove_if_exists=True,\n", + " copy_binary=True,\n", + " output_folder='combined_phy_output_folder')\n" + ] + }, + { + "cell_type": "markdown", + "id": "9a6c810e-a8dc-4cca-96de-abe8b43ea875", + "metadata": {}, + "source": [ + "## inspeccion en grafico" + ] + }, + { + "cell_type": "markdown", + "id": "2da393de-3376-4b3a-89f3-c35f19adb28b", + "metadata": {}, + "source": [ + "### leer un analisis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81cbc069-2452-4ab6-8b24-6dfda650256b", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "folder = 'output/AN/kilosort/analyzer_Rev9'\n", + "analyzer = si.load_sorting_analyzer(folder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b6facf1-61dd-442e-8b14-06b05ac87b94", + "metadata": {}, + "outputs": [], + "source": [ + "exp.export_to_phy(sorting_analyzer=analyzer, \n", + " remove_if_exists=True, \n", + " copy_binary=True, \n", + " output_folder=Path('output/AN/kilosort/phy_rev9v2'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d6d0056-9fc6-4496-b6e1-c98c5bc9ce95", + "metadata": {}, + "outputs": [], + "source": [ + "import spikeinterface.widgets as sw\n", + "\n", + "sw.plot_spikes_on_traces(sorting_analyzer= analyzer, \n", + " segment_index=None, \n", + " channel_ids=None, \n", + " unit_ids=None, \n", + " order_channel_by_depth=False, \n", + " time_range=None, \n", + " unit_colors=None, \n", + " sparsity=None, \n", + " mode='auto', \n", + " return_scaled=False, \n", + " cmap='RdBu', \n", + " show_channel_ids=False, \n", + " color_groups=False, \n", + " color=None, \n", + " clim=None, \n", + " tile_size=512, \n", + " seconds_per_row=0.2, \n", + " scale=1, \n", + " spike_width_ms=4, \n", + " spike_height_um=20, \n", + " with_colorbar=True, \n", + " backend=\"ipywidgets\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "7f84050f-d495-4c88-9a9c-ac8b169e4ff7", + "metadata": {}, + "source": [ + "## Unit quality metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e6abfd0-23d2-4d20-bdc0-18653aad0e99", + "metadata": {}, + "outputs": [], + "source": [ + "from spikeinterface.postprocessing import compute_principal_components\n", + "from spikeinterface.qualitymetrics import compute_quality_metrics, get_quality_metric_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d906c4b1-dc32-4894-bcc0-ed2382a08fca", + "metadata": {}, + "outputs": [], + "source": [ + "get_quality_metric_list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c15fe736-d828-486e-96bc-21b0f1e990a9", + "metadata": {}, + "outputs": [], + "source": [ + "metrics = compute_quality_metrics(analyzer, metric_names=[\"snr\", \"isi_violation\", \"nearest_neighbor\", \"firing_rate\", 'presence_ratio', 'amplitude_cutoff'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4d0fcd5-eb7b-4a1f-999a-5ec2e4db3d9b", + "metadata": {}, + "outputs": [], + "source": [ + "print (metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9b4dda3-81c6-4158-b568-d90645db1c53", + "metadata": {}, + "outputs": [], + "source": [ + "keep_mask = (metrics[\"amplitude_cutoff\"] < 1e-6)\n", + "print(keep_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "510e15fb-115d-46c5-be33-f3baa4c6d089", + "metadata": {}, + "outputs": [], + "source": [ + "keep_unit_ids = keep_mask[keep_mask].index.values\n", + "keep_unit_ids = [unit_id for unit_id in keep_unit_ids]\n", + "print(keep_unit_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c0af91-b06b-40bc-91d5-0e10cf0703f4", + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.ndimage.filters import gaussian_filter1d\n", + "plt.rcParams.update({'font.size': 14})\n", + "\n", + "def plot_metric(data, bins, x_axis_label, color, max_value=-1):\n", + " \n", + " h, b = np.histogram(data, bins=bins, density=True)\n", + "\n", + " x = b[:-1]\n", + " y = gaussian_filter1d(h, 1)\n", + "\n", + " plt.plot(x, y, color=color)\n", + " plt.xlabel(x_axis_label)\n", + " plt.gca().get_yaxis().set_visible(False)\n", + " [plt.gca().spines[loc].set_visible(False) for loc in ['right', 'top', 'left']]\n", + " if max_value < np.max(y) * 1.1:\n", + " max_value = np.max(y) * 1.1\n", + " plt.ylim([0, max_value])\n", + " \n", + " return max_value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99de07c7-e341-41ab-9795-9c5516e9a888", + "metadata": {}, + "outputs": [], + "source": [ + "ss.get_default_sorter_params('spykingcircus2')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1e0f0a4-2a70-4840-a467-233fc57220b8", + "metadata": {}, + "outputs": [], + "source": [ + "sorter_spykingcircus2 = ss.run_sorter(\n", + " sorter_name='spykingcircus2',\n", + " recording = rec_fil,\n", + " verbose=True,\n", + " folder = 'output/spykingcircus2',\n", + " remove_existing_folder=True ## CUIDADO, SOBREESCRIBE LOS DATOS EN CASO DE HABER UNA CARPETA, PARA DESHABILITAR PONER =FALSE\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}