diff --git a/docs/build/.buildinfo b/docs/build/.buildinfo index 3162100..99ec388 100644 --- a/docs/build/.buildinfo +++ b/docs/build/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 88dee1dfb417bc401516c5f62c9b42d9 +config: 6ef85c61a07ec8e9f0ed07676e851c59 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/build/.doctrees/cpg.doctree b/docs/build/.doctrees/cpg.doctree index 9bbdc05..50367a3 100644 Binary files a/docs/build/.doctrees/cpg.doctree and b/docs/build/.doctrees/cpg.doctree differ diff --git a/docs/build/.doctrees/dust.doctree b/docs/build/.doctrees/dust.doctree index 01dfeed..6534126 100644 Binary files a/docs/build/.doctrees/dust.doctree and b/docs/build/.doctrees/dust.doctree differ diff --git a/docs/build/.doctrees/environment.pickle b/docs/build/.doctrees/environment.pickle index 5e405ff..aff86c5 100644 Binary files a/docs/build/.doctrees/environment.pickle and b/docs/build/.doctrees/environment.pickle differ diff --git a/docs/build/.doctrees/eponine.doctree b/docs/build/.doctrees/eponine.doctree index dd9e628..45af0eb 100644 Binary files a/docs/build/.doctrees/eponine.doctree and b/docs/build/.doctrees/eponine.doctree differ diff --git a/docs/build/.doctrees/genblast.doctree b/docs/build/.doctrees/genblast.doctree index 770353a..205991a 100644 Binary files a/docs/build/.doctrees/genblast.doctree and b/docs/build/.doctrees/genblast.doctree differ diff --git a/docs/build/.doctrees/index.doctree b/docs/build/.doctrees/index.doctree index 7a0baca..83ba296 100644 Binary files a/docs/build/.doctrees/index.doctree and b/docs/build/.doctrees/index.doctree differ diff --git a/docs/build/.doctrees/minimap.doctree b/docs/build/.doctrees/minimap.doctree index ed59759..b2f594c 100644 Binary files a/docs/build/.doctrees/minimap.doctree and b/docs/build/.doctrees/minimap.doctree differ diff --git a/docs/build/.doctrees/red.doctree b/docs/build/.doctrees/red.doctree index 6a6bea6..8fa9ece 100644 Binary files a/docs/build/.doctrees/red.doctree and b/docs/build/.doctrees/red.doctree differ diff --git a/docs/build/.doctrees/repeatmasker.doctree b/docs/build/.doctrees/repeatmasker.doctree index 81fbedb..6694fc6 100644 Binary files a/docs/build/.doctrees/repeatmasker.doctree and b/docs/build/.doctrees/repeatmasker.doctree differ diff --git a/docs/build/.doctrees/scallop.doctree b/docs/build/.doctrees/scallop.doctree index 1e971d4..35ed652 100644 Binary files a/docs/build/.doctrees/scallop.doctree and b/docs/build/.doctrees/scallop.doctree differ diff --git a/docs/build/.doctrees/star.doctree b/docs/build/.doctrees/star.doctree index cd91106..29253a8 100644 Binary files a/docs/build/.doctrees/star.doctree and b/docs/build/.doctrees/star.doctree differ diff --git a/docs/build/.doctrees/stringtie.doctree b/docs/build/.doctrees/stringtie.doctree index 3d594fd..c5bb7b3 100644 Binary files a/docs/build/.doctrees/stringtie.doctree and b/docs/build/.doctrees/stringtie.doctree differ diff --git a/docs/build/.doctrees/trf.doctree b/docs/build/.doctrees/trf.doctree index b200346..ba2a77f 100644 Binary files a/docs/build/.doctrees/trf.doctree and b/docs/build/.doctrees/trf.doctree differ diff --git a/docs/build/.doctrees/trnascan.doctree b/docs/build/.doctrees/trnascan.doctree index 5bdfa69..63aa5cb 100644 Binary files a/docs/build/.doctrees/trnascan.doctree and b/docs/build/.doctrees/trnascan.doctree differ diff --git a/docs/build/_modules/ensembl/tools/anno/protein_annotation/genblast.html b/docs/build/_modules/ensembl/tools/anno/protein_annotation/genblast.html index af8efc0..d5fd9b1 100644 --- a/docs/build/_modules/ensembl/tools/anno/protein_annotation/genblast.html +++ b/docs/build/_modules/ensembl/tools/anno/protein_annotation/genblast.html @@ -57,7 +57,6 @@

Source code for ensembl.tools.anno.protein_annotation.genblast

the sequences have undergone significant evolutionary changes. This capability makes it a valuable resource for researchers studying gene evolution, gene families, and gene function across diverse species. - GenBlast has been widely used in various genomic analyses and is available as a standalone command-line tool or as part of different bioinformatics pipelines. Researchers in the field of comparative genomics and gene function analysis @@ -110,17 +109,31 @@

Source code for ensembl.tools.anno.protein_annotation.genblast

) -> None: """ Executes GenBlast on genomic slices - Args: - masked_genome : Masked genome file path. - output_dir: Working directory path. - protein_dataset: Protein dataset (Uniprot/OrthoDb) path. - genblast_timeout_secs: Time for timeout (sec). - max_intron_length: Maximum intron length. - genblast_bin : Software path. - convert2blastmask_bin: Software path. - makeblastdb_bin : Software path. - genblast_timeout: seconds - num_threads: int, number of threads. + :param masked_genome: Masked genome file path. + :type masked_genome: Path + :param output_dir: Working directory path. + :type output_dir: Path + :param protein_dataset: Protein dataset (Uniprot/OrthoDb) path. + :type protein_dataset: Path + :param genblast_timeout_secs: Time for timeout (sec). + :type genblast_timeout_secs: int, default 10800 + :param max_intron_length: Maximum intron length. + :type max_intron_length: int + :param genblast_bin: Software path. + :type genblast_bin: Path, default genblast + :param convert2blastmask_bin: Software path. + :type convert2blastmask_bin: Path, default convert2blastmask + :param makeblastdb_bin: Software path. + :type makeblastdb_bin: Path, default makeblastdb + :param genblast_timeout: seconds + :type genblast_timeout: int, default 1 + :param num_threads: int, number of threads. + :type num_threads:int, default 1 + :param protein_set: Source + :type str: ["uniprot", "orthodb"] + + :return: None + :rtype: None """ check_exe(genblast_bin) diff --git a/docs/build/_modules/ensembl/tools/anno/repeat_annotation/dust.html b/docs/build/_modules/ensembl/tools/anno/repeat_annotation/dust.html index f5daa3d..2691734 100644 --- a/docs/build/_modules/ensembl/tools/anno/repeat_annotation/dust.html +++ b/docs/build/_modules/ensembl/tools/anno/repeat_annotation/dust.html @@ -95,11 +95,17 @@

Source code for ensembl.tools.anno.repeat_annotation.dust

) -> None: """ Run Dust on genomic slices with mutiprocessing - Args: - genome_file : Genome file path. - output_dir : Working directory path. - dust_bin : Dust software path. - num_threads: Number of threads. + :param genome_file: Genome file path. + :type genome_file: PathLike + :param output_dir: Working directory path. + :type output_dir: Path + :param dust_bin: Dust software path. + :type dust_bin: Path, default dustmasker + :param num_threads: Number of threads. + :type num_threads: int, default 1 + + :return: None + :rtype: None """ check_exe(dust_bin) @@ -113,9 +119,7 @@

Source code for ensembl.tools.anno.repeat_annotation.dust

return logger.info("Creating list of genomic slices") seq_region_to_length = get_seq_region_length(genome_file, 5000) - slice_ids_per_region = get_slice_id( - seq_region_to_length, slice_size=1000000, overlap=0, min_length=5000 - ) + slice_ids_per_region = get_slice_id(seq_region_to_length, slice_size=1000000, overlap=0, min_length=5000) dust_cmd = [dust_bin, "-in"] pool = multiprocessing.Pool(num_threads) # pylint: disable=consider-using-with for slice_id in slice_ids_per_region: @@ -197,8 +201,7 @@

Source code for ensembl.tools.anno.repeat_annotation.dust

start = int(result_match.group(1)) + 1 end = int(result_match.group(2)) + 1 gtf_line = ( - f"{region_name}\tDust\trepeat\t{start}\t" - f'{end}\t.\t+\t.\trepeat_id "{repeat_count}";\n' + f"{region_name}\tDust\trepeat\t{start}\t" f'{end}\t.\t+\t.\trepeat_id "{repeat_count}";\n' ) dust_out.write(gtf_line) repeat_count += 1 @@ -207,20 +210,14 @@

Source code for ensembl.tools.anno.repeat_annotation.dust

class InputSchema(argschema.ArgSchema): """Input arguments expected to run DustMasker.""" - genome_file = argschema.fields.InputFile( - required=True, description="Genome file path" - ) - output_dir = argschema.fields.OutputDir( - required=True, description="Output directory path" - ) + genome_file = argschema.fields.InputFile(required=True, description="Genome file path") + output_dir = argschema.fields.OutputDir(required=True, description="Output directory path") dust_bin = argschema.fields.String( required=False, default="dustmasker", description="Dust executable path", ) - num_threads = argschema.fields.Integer( - required=False, default=1, description="Number of threads" - ) + num_threads = argschema.fields.Integer(required=False, default=1, description="Number of threads") def main() -> None: diff --git a/docs/build/_modules/ensembl/tools/anno/repeat_annotation/red.html b/docs/build/_modules/ensembl/tools/anno/repeat_annotation/red.html index 353ed83..8e4687b 100644 --- a/docs/build/_modules/ensembl/tools/anno/repeat_annotation/red.html +++ b/docs/build/_modules/ensembl/tools/anno/repeat_annotation/red.html @@ -80,14 +80,15 @@

Source code for ensembl.tools.anno.repeat_annotation.red

def run_red(genome_file: Path, output_dir: Path, red_bin: Path = Path("Red"),) -> str: """ Run Red on genome file - - Args: - genome_file : Genome file path. - output_dir : Working directory path. - red_bin : Red software path. - - Return: - masked genome file + :param genome_file: Genome file path. + :type genome_file: Path + :param output_dir: Working directory path. + :type output_dir: Path + :param red_bin: Red software path. + :type red_bin: Path, default Red + + :return: Masked genome file + :rtype: str """ check_exe(red_bin) red_dir = create_dir(output_dir, "red_output") diff --git a/docs/build/_modules/ensembl/tools/anno/repeat_annotation/repeatmasker.html b/docs/build/_modules/ensembl/tools/anno/repeat_annotation/repeatmasker.html index d5834b7..cf92ba6 100644 --- a/docs/build/_modules/ensembl/tools/anno/repeat_annotation/repeatmasker.html +++ b/docs/build/_modules/ensembl/tools/anno/repeat_annotation/repeatmasker.html @@ -96,14 +96,24 @@

Source code for ensembl.tools.anno.repeat_annotation.repeatmasker

""" Executes RepeatMasker on the genome slices and stores the final annotation.gtf in repeatmasker_output - Args: - genome_file : Genome file path. - repeatmasker_path : RepeatMasker executable path. - library : Custom repeat library. - species :Species name. - output_dir : Output directory path. - num_threads: Number of threads. + :param genome_file: Genome file path. + :type genome_file: PathLike + :param output_dir: Output directory path. + :type output_dir: Path + :param repeatmasker_bin: RepeatMasker executable path. + :type repeatmasker_bin: Path, default RepeatMasker + :param library: Custom repeat library. + :type library: str + :param repeatmasker_engine: RepeatMasker engine. + :type repeatmasker_engine: str, default rmblast + :param species: Species name. + :type species: str + :param num_threads: Number of threads. + :type num_threads: int, default 1 + + :return: None + :rtype: None """ check_exe(repeatmasker_bin) repeatmasker_dir = create_dir(output_dir, "repeatmasker_output") diff --git a/docs/build/_modules/ensembl/tools/anno/repeat_annotation/trf.html b/docs/build/_modules/ensembl/tools/anno/repeat_annotation/trf.html index 8cfb683..4f02ecd 100644 --- a/docs/build/_modules/ensembl/tools/anno/repeat_annotation/trf.html +++ b/docs/build/_modules/ensembl/tools/anno/repeat_annotation/trf.html @@ -99,18 +99,31 @@

Source code for ensembl.tools.anno.repeat_annotation.trf

) -> None: """ Executes TRF on genomic slices - Args: - genome_file : Genome file path. - output_dir : working directory path. - num_threads: int, number of threads. - trf_bin : TRF software path. - match_score : Matching weight. - mismatch_score : Mismatching penalty. - delta : Indel penalty. - pm : Match probability (whole number). - pi : Indel probability (whole number). - minscore : Minimum alignment score to report. - maxperiod : Maximum period size to report. + :param genome_file: Genome file path. + :type genome_file: PathLike + :param output_dir: Working directory path. + :type output_dir: Path + :param num_threads: int, number of threads. + :type num_threads: int, default 1 + :param trf_bin: TRF software path. + :type trf_bin: Path, default trf + :param match_score: Matching weight. + :type match_score: int, default 2 + :param mismatch_score: Mismatching penalty. + :type mismatch_score: int, default 5 + :param delta: Indel penalty. + :type delta: int, default 7 + :param pm: Match probability (whole number). + :type pm: int, default 80 + :param pi: Indel probability (whole number). + :type pi: int, default 10 + :param minscore: Minimum alignment score to report. + :type minscore: int, default 40 + :param maxperiod: Maximum period size to report. + :type maxperiod: int, default 500 + + :return: None + :rtype: None """ check_exe(trf_bin) trf_dir = create_dir(output_dir, "trf_output") diff --git a/docs/build/_modules/ensembl/tools/anno/simple_feature_annotation/cpg.html b/docs/build/_modules/ensembl/tools/anno/simple_feature_annotation/cpg.html index be970f5..ff586ac 100644 --- a/docs/build/_modules/ensembl/tools/anno/simple_feature_annotation/cpg.html +++ b/docs/build/_modules/ensembl/tools/anno/simple_feature_annotation/cpg.html @@ -94,14 +94,24 @@

Source code for ensembl.tools.anno.simple_feature_annotation.cpg

) -> None: """ Run CpG islands on genomic slices - Args: - genome_file : Genome file path. - output_dir : Working directory path - cpg_bin : CpG software path. - cpg_min_length : Min length of CpG islands - cpg_min_gc_content : Min GC frequency percentage - cpg_min_oe : Min ratio of the observed to expected number of CpG (CpGo/e) - num_threads: int, number of threads. + + :param genome_file: Genome file path. + :type genome_file: PathLike + :param output_dir: Working directory path + :type output_dir: Path + :param cpg_bin: CpG software path. + :type cpg_bin: Path + :param cpg_min_length: Min length of CpG islands + :type cpg_min_length: int + :param cpg_min_gc_content: Min GC frequency percentage + :type cpg_min_gc_content: int + :param cpg_min_oe: Min ratio of the observed to expected number of CpG (CpGo/e) + :type cpg_min_oe: float + :param num_threads: int, number of threads. + :type num_threads: int + + :return: None + :rtype: None """ check_exe(cpg_bin) diff --git a/docs/build/_modules/ensembl/tools/anno/simple_feature_annotation/eponine.html b/docs/build/_modules/ensembl/tools/anno/simple_feature_annotation/eponine.html index 27a55ba..656690e 100644 --- a/docs/build/_modules/ensembl/tools/anno/simple_feature_annotation/eponine.html +++ b/docs/build/_modules/ensembl/tools/anno/simple_feature_annotation/eponine.html @@ -99,12 +99,19 @@

Source code for ensembl.tools.anno.simple_feature_annotation.eponine

) -> None: """ Run Eponine on genomic slices - Args: - genome_file : Genome file path. - output_dir : Working directory path. - java_bin : Java path. - eponine_bin : Eponine software path - num_threads: Number of threads. + :param genome_file: Genome file path. + :param genome_file: PathLike + :param output_dir: Working directory path. + :param output_dir: Path + :param java_bin: Java path. + :param java_bin: Path, default java + :param eponine_bin: Eponine software path + :param eponine_bin: Path + :param num_threads: Number of threads. + :param num_threads: int, default 1 + + :return: None + :rtype: None """ check_file(eponine_bin) check_exe(java_bin) diff --git a/docs/build/_modules/ensembl/tools/anno/snc_rna_annotation/trnascan.html b/docs/build/_modules/ensembl/tools/anno/snc_rna_annotation/trnascan.html index efdf5b5..ff3e26c 100644 --- a/docs/build/_modules/ensembl/tools/anno/snc_rna_annotation/trnascan.html +++ b/docs/build/_modules/ensembl/tools/anno/snc_rna_annotation/trnascan.html @@ -94,12 +94,19 @@

Source code for ensembl.tools.anno.snc_rna_annotation.trnascan

) -> None: """ Executes tRNAscan-SE on genomic slices - Args: - genome_file : Genome file path. - trnascan_bin : tRNAscan-SE software path. - trnascan_filter : tRNAscan-SE filter set path. - output_dir : working directory path. - num_threads: int, number of threads. + :param genome_file: Genome file path. + :type genome_file: PathLike + :param output_dir: working directory path. + :type output_dir, Path + :param trnascan_bin: tRNAscan-SE software path. + :type trnascan_bin: Path, default tRNAscan-SE + :param trnascan_filter: tRNAscan-SE filter set path. + :type trnascan_filter: Path, default EukHighConfidenceFilter + :param num_threads: int, number of threads. + :type num_threads: int, default 1 + + :return: None + :rtype: None """ check_exe(trnascan_bin) check_file(trnascan_filter) diff --git a/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/minimap.html b/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/minimap.html index dc55ad4..371a316 100644 --- a/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/minimap.html +++ b/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/minimap.html @@ -36,7 +36,7 @@

Source code for ensembl.tools.anno.transcriptomic_annotation.minimap

-# See the NOTICE file distributed with this work for additional information
+# See the NOTICE file distributed with this work for additional information #pylint: disable=missing-module-docstring
 # regarding copyright ownership.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -50,7 +50,6 @@ 

Source code for ensembl.tools.anno.transcriptomic_annotation.minimap

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """ Minimap2 is a pairwise sequence alignment algorithm designed for efficiently comparing nucleotide sequences. The algorithm uses a versatile indexing strategy to quickly find approximate matches between sequences, @@ -89,14 +88,23 @@

Source code for ensembl.tools.anno.transcriptomic_annotation.minimap

""" Run Minimap2 to align long read data against genome file. Default Minimap set for PacBio data. - Args: - output_dir : Working directory path. - long_read_fastq_dir : Long read directory path. - genome_file : Genome file path. - minimap2_bin : Software path. - paftools_bin : Software path. - max_intron_length : The maximum intron size for alignments. Defaults to 100000. - num_threads : Number of available threads. + :param output_dir: Working directory path. + :type output_dir: Path + :param long_read_fastq_dir: Long read directory path. + :type long_read_fastq_dir: Path + :param genome_file: Genome file path. + :type genome_file: Path + :param minimap2_bin: Software path. + :type minimap2_bin: Path, default minimap2 + :param paftools_bin: Software path. + :type paftools_bin: Path, default paftools.js + :param max_intron_length: The maximum intron size for alignments. Defaults to 100000. + :type max_intron_length: int, default 100000 + :param num_threads: Number of available threads. + :type num_threads: int, default 1 + + :return: None + :rtype: None """ check_exe(minimap2_bin) check_exe(paftools_bin) diff --git a/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/scallop.html b/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/scallop.html index 6f09568..b36c5a4 100644 --- a/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/scallop.html +++ b/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/scallop.html @@ -92,12 +92,19 @@

Source code for ensembl.tools.anno.transcriptomic_annotation.scallop

""" Run Scallop assembler on short read data after STAR alignment. - Args: - output_dir : Working directory path. - scallop_bin : Software path. - prlimit_bin : Software path. - stringtie_bin : Software path. - memory_limit : Memory limit Scallop command Defaults to 40*1024**3. + :param output_dir: Working directory path. + :type output_dir: Path + :param scallop_bin: Software path. + :type scallop_bin: Path, default scallop + :param prlimit_bin: Software path. + :type prlimit_bin: Path, default prlimit + :param stringtie_bin: Software path. + :type stringtie_bin: Path, default stringtie + :param memory_limit: Memory limit Scallop command Defaults to 40*1024**3. + :type memory_limit: int + + :return: None + :rtype: None """ check_exe(scallop_bin) check_exe(stringtie_bin) diff --git a/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/star.html b/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/star.html index bc215f2..9fa9ac7 100644 --- a/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/star.html +++ b/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/star.html @@ -100,18 +100,31 @@

Source code for ensembl.tools.anno.transcriptomic_annotation.star

Run STAR alignment on list of short read data. Args: - genome_file : Genome file path. - output_dir : Working directory path. - short_read_fastq_dir : Short read directory path. - delete_pre_trim_fastq : Delete the original fastq files after trimming. Defaults to False. - trim_fastq : Trim short read files using TrimGalore. Defaults to False. - max_reads_per_sample : Max number of reads per sample. Defaults to 0 (unlimited). - max_intron_length : The maximum intron size for alignments. Defaults to 100000. - num_threads : Number of available threads. - star_bin : Software path. - samtools_bin : Software path. - trim_galore_bin : Software path. - + :param genome_file: Genome file path. + :type genome_file: Path + :param output_dir: Working directory path. + :type output_dir: Path + :param short_read_fastq_dir: Short read directory path. + :type short_read_fastq_dir: Path + :param delete_pre_trim_fastq: Delete the original fastq files after trimming. Defaults to False. + :type delete_pre_trim_fastq: boolean, default False + :param trim_fastq: Trim short read files using TrimGalore. Defaults to False. + :type trim_fastq: boolean, default False + :param max_reads_per_sample: Max number of reads per sample. Defaults to 0 (unlimited). + :type max_reads_per_sample: int, default 0 + :param max_intron_length: The maximum intron size for alignments. Defaults to 100000. + :type max_intron_length: int, default 100000 + :param num_threads: Number of available threads. + :type num_threads: int, default 1 + :param star_bin: Software path. + :type star_bin: Path, default star + :param samtools_bin: Software path. + :type samtools_bin: Path,default samtools + :param trim_galore_bin: Software path. + :type trim_galore_bin: Path, default trim_galore + + :return: None + :rtype: None """ check_exe(star_bin) # If trimming has been enabled then switch the path for diff --git a/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/stringtie.html b/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/stringtie.html index b951590..9adde40 100644 --- a/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/stringtie.html +++ b/docs/build/_modules/ensembl/tools/anno/transcriptomic_annotation/stringtie.html @@ -83,10 +83,15 @@

Source code for ensembl.tools.anno.transcriptomic_annotation.stringtie

< ) -> None: """ StringTie assembler of short read data. - Args: - output_dir : Working directory path. - stringtie_bin : Software path. - num_threads : Number of available threads. + :param output_dir: Working directory path. + :type output_dir: Path + :param stringtie_bin: Software path. + :type stringtie_bin: Path, default stringtie + :param num_threads: Number of available threads. + :type num_threads: int, default 1 + + :return: None + :rtype: None """ check_exe(stringtie_bin) stringtie_dir = create_dir(output_dir, "stringtie_output") diff --git a/docs/build/_sources/index.rst.txt b/docs/build/_sources/index.rst.txt index 6e24e41..add9008 100644 --- a/docs/build/_sources/index.rst.txt +++ b/docs/build/_sources/index.rst.txt @@ -20,7 +20,7 @@ ======================================== Ensembl-anno -=========================================== +======================================== Anno tool kit diff --git a/docs/build/cpg.html b/docs/build/cpg.html index 7afe15e..390af36 100644 --- a/docs/build/cpg.html +++ b/docs/build/cpg.html @@ -51,14 +51,59 @@
ensembl.tools.anno.simple_feature_annotation.cpg.run_cpg(genome_file: PathLike, output_dir: Path, cpg_bin: Path = PosixPath('cpg_lh'), cpg_min_length: int = 400, cpg_min_gc_content: int = 50, cpg_min_oe: float = 0.6, num_threads: int = 1) None[source]
-

Run CpG islands on genomic slices -:param genome_file: Genome file path. -:param output_dir: Working directory path -:param cpg_bin: CpG software path. -:param cpg_min_length: Min length of CpG islands -:param cpg_min_gc_content: Min GC frequency percentage -:param cpg_min_oe: Min ratio of the observed to expected number of CpG (CpGo/e) -:param num_threads: int, number of threads.

+

Run CpG islands on genomic slices

+
+
+
param genome_file:
+

Genome file path.

+
+
type genome_file:
+

PathLike

+
+
param output_dir:
+

Working directory path

+
+
type output_dir:
+

Path

+
+
param cpg_bin:
+

CpG software path.

+
+
type cpg_bin:
+

Path

+
+
param cpg_min_length:
+

Min length of CpG islands

+
+
type cpg_min_length:
+

int

+
+
param cpg_min_gc_content:
+

Min GC frequency percentage

+
+
type cpg_min_gc_content:
+

int

+
+
param cpg_min_oe:
+

Min ratio of the observed to expected number of CpG (CpGo/e)

+
+
type cpg_min_oe:
+

float

+
+
param num_threads:
+

int, number of threads.

+
+
type num_threads:
+

int

+
+
return:
+

None

+
+
rtype:
+

None

+
+
+
diff --git a/docs/build/dust.html b/docs/build/dust.html index 68adb79..ac6ee27 100644 --- a/docs/build/dust.html +++ b/docs/build/dust.html @@ -51,11 +51,41 @@
ensembl.tools.anno.repeat_annotation.dust.run_dust(genome_file: PathLike, output_dir: Path, dust_bin: Path = PosixPath('dustmasker'), num_threads: int = 1) None[source]
-

Run Dust on genomic slices with mutiprocessing -:param genome_file: Genome file path. -:param output_dir: Working directory path. -:param dust_bin: Dust software path. -:param num_threads: Number of threads.

+
+
Run Dust on genomic slices with mutiprocessing
+
param genome_file:
+

Genome file path.

+
+
type genome_file:
+

PathLike

+
+
param output_dir:
+

Working directory path.

+
+
type output_dir:
+

Path

+
+
param dust_bin:
+

Dust software path.

+
+
type dust_bin:
+

Path, default dustmasker

+
+
param num_threads:
+

Number of threads.

+
+
type num_threads:
+

int, default 1

+
+
return:
+

None

+
+
rtype:
+

None

+
+
+
+
@@ -74,10 +104,7 @@

Table of Contents

  • API Setup and installation
  • License
  • CpG Module Documentation
  • -
  • DustMasker Module Documentation -
  • +
  • DustMasker Module Documentation
  • Eponine Module Documentation
  • Genblast Module Documentation
  • Minimap2 Module Documentation
  • diff --git a/docs/build/eponine.html b/docs/build/eponine.html index d0fefe0..3e90c10 100644 --- a/docs/build/eponine.html +++ b/docs/build/eponine.html @@ -52,12 +52,47 @@
    ensembl.tools.anno.simple_feature_annotation.eponine.run_eponine(genome_file: PathLike, output_dir: Path, num_threads: int = 1, java_bin: Path = PosixPath('java'), eponine_bin: Path = PosixPath('/hps/software/users/ensembl/ensw/C8-MAR21-sandybridge/linuxbrew/opt/eponine/libexec/eponine-scan.jar'), eponine_threshold: float = 0.999) None[source]
    -

    Run Eponine on genomic slices -:param genome_file: Genome file path. -:param output_dir: Working directory path. -:param java_bin: Java path. -:param eponine_bin: Eponine software path -:param num_threads: Number of threads.

    +
    +
    Run Eponine on genomic slices
    +
    param genome_file:
    +

    Genome file path.

    +
    +
    param genome_file:
    +

    PathLike

    +
    +
    param output_dir:
    +

    Working directory path.

    +
    +
    param output_dir:
    +

    Path

    +
    +
    param java_bin:
    +

    Java path.

    +
    +
    param java_bin:
    +

    Path, default java

    +
    +
    param eponine_bin:
    +

    Eponine software path

    +
    +
    param eponine_bin:
    +

    Path

    +
    +
    param num_threads:
    +

    Number of threads.

    +
    +
    param num_threads:
    +

    int, default 1

    +
    +
    return:
    +

    None

    +
    +
    rtype:
    +

    None

    +
    +
    +
    +
    @@ -77,10 +112,7 @@

    Table of Contents

  • License
  • CpG Module Documentation
  • DustMasker Module Documentation
  • -
  • Eponine Module Documentation -
  • +
  • Eponine Module Documentation
  • Genblast Module Documentation
  • Minimap2 Module Documentation
  • Red Module Documentation
  • diff --git a/docs/build/genblast.html b/docs/build/genblast.html index 67417d2..6e101eb 100644 --- a/docs/build/genblast.html +++ b/docs/build/genblast.html @@ -49,8 +49,8 @@ comparative genomics tasks and accurately identify homologs even when the sequences have undergone significant evolutionary changes. This capability makes it a valuable resource for researchers studying gene -evolution, gene families, and gene function across diverse species.

    -

    GenBlast has been widely used in various genomic analyses and is available as +evolution, gene families, and gene function across diverse species. +GenBlast has been widely used in various genomic analyses and is available as a standalone command-line tool or as part of different bioinformatics pipelines. Researchers in the field of comparative genomics and gene function analysis often rely on GenBlast to perform sensitive homology searches and obtain @@ -62,17 +62,79 @@

    ensembl.tools.anno.protein_annotation.genblast.run_genblast(masked_genome: Path, output_dir: Path, protein_dataset: Path, max_intron_length: int, genblast_timeout_secs: int = 10800, genblast_bin: Path = PosixPath('genblast'), convert2blastmask_bin: Path = PosixPath('convert2blastmask'), makeblastdb_bin: Path = PosixPath('makeblastdb'), num_threads: int = 1, protein_set: str = ['uniprot', 'orthodb']) None[source]
    -

    Executes GenBlast on genomic slices -:param masked_genome: Masked genome file path. -:param output_dir: Working directory path. -:param protein_dataset: Protein dataset (Uniprot/OrthoDb) path. -:param genblast_timeout_secs: Time for timeout (sec). -:param max_intron_length: Maximum intron length. -:param genblast_bin: Software path. -:param convert2blastmask_bin: Software path. -:param makeblastdb_bin: Software path. -:param genblast_timeout: seconds -:param num_threads: int, number of threads.

    +
    +
    Executes GenBlast on genomic slices
    +
    param masked_genome:
    +

    Masked genome file path.

    +
    +
    type masked_genome:
    +

    Path

    +
    +
    param output_dir:
    +

    Working directory path.

    +
    +
    type output_dir:
    +

    Path

    +
    +
    param protein_dataset:
    +

    Protein dataset (Uniprot/OrthoDb) path.

    +
    +
    type protein_dataset:
    +

    Path

    +
    +
    param genblast_timeout_secs:
    +

    Time for timeout (sec).

    +
    +
    type genblast_timeout_secs:
    +

    int, default 10800

    +
    +
    param max_intron_length:
    +

    Maximum intron length.

    +
    +
    type max_intron_length:
    +

    int

    +
    +
    param genblast_bin:
    +

    Software path.

    +
    +
    type genblast_bin:
    +

    Path, default genblast

    +
    +
    param convert2blastmask_bin:
    +

    Software path.

    +
    +
    type convert2blastmask_bin:
    +

    Path, default convert2blastmask

    +
    +
    param makeblastdb_bin:
    +

    Software path.

    +
    +
    type makeblastdb_bin:
    +

    Path, default makeblastdb

    +
    +
    param genblast_timeout:
    +

    seconds

    +
    +
    type genblast_timeout:
    +

    int, default 1

    +
    +
    param num_threads:
    +

    int, number of threads.

    +
    +
    +

    :type num_threads:int, default 1 +:param protein_set: Source +:type str: [“uniprot”, “orthodb”]

    +
    +
    return:
    +

    None

    +
    +
    rtype:
    +

    None

    +
    +
    +
    +
    @@ -93,10 +155,7 @@

    Table of Contents

  • CpG Module Documentation
  • DustMasker Module Documentation
  • Eponine Module Documentation
  • -
  • Genblast Module Documentation -
  • +
  • Genblast Module Documentation
  • Minimap2 Module Documentation
  • Red Module Documentation
  • Repeatmasker Module Documentation
  • diff --git a/docs/build/index.html b/docs/build/index.html index 0e1cabe..9f11b08 100644 --- a/docs/build/index.html +++ b/docs/build/index.html @@ -5,7 +5,7 @@ - Contents — ensembl-anno 0.1 documentation + Ensembl-anno — ensembl-anno 0.1 documentation @@ -39,9 +39,11 @@
    -

    Anno tool kit

    +
    +

    Ensembl-anno

    +

    Anno tool kit

    -

    Contents

    +

    Contents

    Check out installation section for further information on how to install the project.

    @@ -64,13 +66,14 @@

    Contents

    -

    Indices and tables

    +

    Indices and tables

    +
    diff --git a/docs/build/install.html b/docs/build/install.html index 6d87929..b2cf6cf 100644 --- a/docs/build/install.html +++ b/docs/build/install.html @@ -15,14 +15,14 @@ - +