Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MISC] lint #146

Merged
merged 2 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
# SPDX-License-Identifier: CC0-1.0

# Format all files in include/test folder, including std module, excluding contrib module
# find . \( -iname "*.cpp" -or -iname "*.hpp" \) -and -not -path "./lib/*" -and -not -path "./build/*" | xargs clang-format-18 --style=file -i
# Format all files in include/test folder
# find . \( -iname "*.cpp" -or -iname "*.hpp" \) -and -not -path "./build/*" | xargs clang-format-18 --style=file -i
# Staged files: git diff --name-only HEAD --diff-filter=ACMRT | grep -E "(\.cpp|\.hpp)$" | xargs clang-format-18 --style=file -i
---
Language: Cpp
Expand Down
12 changes: 7 additions & 5 deletions .cmake-format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
# SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
# SPDX-License-Identifier: CC0-1.0

# find . \( -iname CMakeLists.txt -o -iname *.cmake \) -a -not -path "./lib/*" -a -not -path "./build/*" | xargs cmake-format -c .cmake-format.yaml -i
# find . \( -iname CMakeLists.txt -o -iname *.cmake \) -a -not -path "./build/*" | xargs cmake-format -c .cmake-format.yaml -i
_help_parse: Options affecting listfile parsing
parse:
_help_additional_commands:
- Specify structure for custom cmake functions
additional_commands:
declare_internal_datasource:
declare_datasource:
pargs:
nargs: '*'
flags: []
kwargs:
FILE: '*'
FILE: 1
URL: '*'
URL_HASH: '*'
CONFIGURE: '*'
URL_HASH: 1
cpmgetpackage:
pargs: 1
spelling: CPMGetPackage
Expand Down
27 changes: 13 additions & 14 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,22 @@
#
# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors

set(CPM_DOWNLOAD_VERSION 0.40.2)
set(CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d")
set (CPM_DOWNLOAD_VERSION 0.40.2)
set (CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d")

if(CPM_SOURCE_CACHE)
set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif(DEFINED ENV{CPM_SOURCE_CACHE})
set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else()
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif()
if (CPM_SOURCE_CACHE)
set (CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif (DEFINED ENV{CPM_SOURCE_CACHE})
set (CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else ()
set (CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif ()

# Expand relative path. This is important if the provided path contains a tilde (~)
get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
get_filename_component (CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)

file(DOWNLOAD
https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
file (DOWNLOAD https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
)

include(${CPM_DOWNLOAD_LOCATION})
include (${CPM_DOWNLOAD_LOCATION})
5 changes: 2 additions & 3 deletions cmake/test/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ if (NOT TARGET ${PROJECT_NAME}_test)
add_library (${PROJECT_NAME}_test INTERFACE)
target_compile_options (${PROJECT_NAME}_lib PUBLIC "-pedantic" "-Wall" "-Wextra" "-Werror")


if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# Disable bogus warnings in GCC12.
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12 AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13)
Expand All @@ -36,8 +35,8 @@ if (NOT TARGET ${PROJECT_NAME}_test)

# !Workaround: Get seqan3 test include dir from seqan3 target
find_path (SEQAN3_TEST_INCLUDE_DIR
NAMES seqan3/test/tmp_directory.hpp
HINTS "${seqan3_SOURCE_DIR}/test/include"
NAMES seqan3/test/tmp_directory.hpp
HINTS "${seqan3_SOURCE_DIR}/test/include"
)
target_include_directories (${PROJECT_NAME}_test SYSTEM INTERFACE "${SEQAN3_TEST_INCLUDE_DIR}")

Expand Down
3 changes: 2 additions & 1 deletion doc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ if (${DOXYGEN_FOUND})
COMMAND ${DOXYGEN_EXECUTABLE}
WORKING_DIRECTORY ${APP_TEMPLATE_DOXYGEN_OUTPUT_DIR}
COMMENT "Generating (developer) API documentation with Doxygen."
VERBATIM)
VERBATIM
)
message (STATUS "${FontBold}You can run `make doc` to build api documentation.${FontReset}")
else ()
message (STATUS "Doxygen not found.")
Expand Down
2 changes: 1 addition & 1 deletion include/estimate.h → include/estimate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include <filesystem>

#include "shared.h"
#include "shared.hpp"

/*!\brief The arguments necessary for a search.
* \param std::filesystem::path search_file The sequence file containing the transcripts to be searched for.
Expand Down
77 changes: 50 additions & 27 deletions include/ibf.h → include/ibf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,40 @@

#pragma once

#include <filesystem>
#include <iostream>
#include <math.h>
#include <numeric>
#include <string>

#include <seqan3/alphabet/container/concatenated_sequences.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <filesystem>

#include "shared.h"
#include "shared.hpp"

struct minimiser_arguments
{
std::filesystem::path include_file; // Needs to be defined when only minimisers appearing in this file should be stored
std::filesystem::path exclude_file; // Needs to be defined when minimisers appearing in this file should NOT be stored
std::vector<int> samples{}; // Can be used to indicate that sequence files belong to the same experiment
bool paired = false; // If true, than experiments are seen as paired-end experiments
std::filesystem::path
include_file; // Needs to be defined when only minimisers appearing in this file should be stored
std::filesystem::path
exclude_file; // Needs to be defined when minimisers appearing in this file should NOT be stored
std::vector<int> samples{}; // Can be used to indicate that sequence files belong to the same experiment
bool paired = false; // If true, than experiments are seen as paired-end experiments
bool experiment_names = false; // Flag, if names of experiment should be stored in a txt file
bool ram_friendly = false;
};

//!\brief Generates a random integer not greater than a given maximum
struct RandomGenerator {
int maxi;
RandomGenerator(int max) :
maxi(max) {
}

int operator()() {
return rand() % maxi;
}
struct RandomGenerator
{
int maxi;
RandomGenerator(int max) : maxi(max)
{}

int operator()()
{
return rand() % maxi;
}
};

/*!\brief Get the concrete expression values (= median of all counts of one transcript) for given experiments.
Expand All @@ -48,8 +51,11 @@ struct RandomGenerator {
* \param genome_file A "*.genome" file constructed with the command genome.
* \param paired Flag to indicate if input data is paired or not.
*/
void count(min_arguments const & args, std::vector<std::filesystem::path> sequence_files, std::filesystem::path include_file,
std::filesystem::path genome_file, bool paired);
void count(min_arguments const & args,
std::vector<std::filesystem::path> sequence_files,
std::filesystem::path include_file,
std::filesystem::path genome_file,
bool paired);

/*!\brief Creates a set of minimizers to ignore, which should be used as an input to count.
* \param args The minimiser arguments to use (seed, shape, window size).
Expand All @@ -71,7 +77,10 @@ void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map<
* \param num_of_minimisers Variable, where to number of minimisers should be stored.
* \param cutoff cutoff value.
*/
void read_binary_start(min_arguments & args, std::filesystem::path filename, uint64_t & num_of_minimisers, uint8_t & cutoff);
void read_binary_start(min_arguments & args,
std::filesystem::path filename,
uint64_t & num_of_minimisers,
uint8_t & cutoff);

/*! \brief Creates IBFs.
* \param sequence_files A vector of sequence file paths.
Expand All @@ -85,8 +94,11 @@ void read_binary_start(min_arguments & args, std::filesystem::path filename, uin
* \param num_hash The number of hash functions to use.
* \returns The expression thresholds per experiment.
*/
std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_files, estimate_ibf_arguments & ibf_args,
minimiser_arguments & minimiser_args, std::vector<double> & fpr, std::vector<uint8_t> & cutoffs,
std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_files,
estimate_ibf_arguments & ibf_args,
minimiser_arguments & minimiser_args,
std::vector<double> & fpr,
std::vector<uint8_t> & cutoffs,
std::filesystem::path const expression_by_genome_file = "",
size_t num_hash = 1);

Expand All @@ -101,7 +113,8 @@ std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_fi
* \returns The expression thresholds per experiment.
*/
std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & minimiser_files,
estimate_ibf_arguments & ibf_args, std::vector<double> & fpr,
estimate_ibf_arguments & ibf_args,
std::vector<double> & fpr,
std::filesystem::path const expression_by_genome_file = "",
size_t num_hash = 1);

Expand All @@ -111,8 +124,10 @@ std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & minimiser_f
* \param minimiser_args The minimiser specific arguments to use.
* \param cutoffs List of cutoffs.
*/
void minimiser(std::vector<std::filesystem::path> const & sequence_files, min_arguments const & args,
minimiser_arguments & minimiser_args, std::vector<uint8_t> & cutoffs);
void minimiser(std::vector<std::filesystem::path> const & sequence_files,
min_arguments const & args,
minimiser_arguments & minimiser_args,
std::vector<uint8_t> & cutoffs);

/*! \brief Insert into IBFs.
* \param sequence_files A vector of sequence file paths.
Expand All @@ -127,9 +142,12 @@ void minimiser(std::vector<std::filesystem::path> const & sequence_files, min_ar
* \returns The expression thresholds per experiment.
*/
std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & sequence_files,
estimate_ibf_arguments & ibf_args, minimiser_arguments & minimiser_args,
estimate_ibf_arguments & ibf_args,
minimiser_arguments & minimiser_args,
std::vector<uint8_t> & cutoffs,
std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise);
std::filesystem::path const expression_by_genome_file,
std::filesystem::path path_in,
bool samplewise);

/*! \brief Insert into IBFs based on the minimiser files
* \param minimiser_files A vector of minimiser file paths.
Expand All @@ -143,7 +161,9 @@ std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & sequence
*/
std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & minimiser_files,
estimate_ibf_arguments & ibf_args,
std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise);
std::filesystem::path const expression_by_genome_file,
std::filesystem::path path_in,
bool samplewise);

/*! \brief Delete bins from ibfs
* \param delete_files A vector of integers specifiying the bins to delete.
Expand All @@ -152,4 +172,7 @@ std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & minimise
* \param path_in Input directory.
* \param samplewise True, if expression levels were set beforehand.
*/
void delete_bin(std::vector<uint64_t> const & delete_files, estimate_ibf_arguments & ibf_args, std::filesystem::path path_in, bool samplewise);
void delete_bin(std::vector<uint64_t> const & delete_files,
estimate_ibf_arguments & ibf_args,
std::filesystem::path path_in,
bool samplewise);
14 changes: 7 additions & 7 deletions include/shared.h → include/shared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#include <seqan3/search/kmer_index/shape.hpp>
#include <seqan3/search/views/minimiser_hash.hpp>

inline constexpr static uint64_t adjust_seed(uint8_t const kmer_size, uint64_t const seed = 0x8F3F73B5CF1C9ADEULL) noexcept
static inline constexpr uint64_t adjust_seed(uint8_t const kmer_size,
uint64_t const seed = 0x8F'3F'73'B5'CF'1C'9A'DEULL) noexcept
{
return seed >> (64u - 2u * kmer_size);
}
Expand All @@ -31,7 +32,7 @@ struct all_arguments
struct min_arguments : all_arguments
{
uint8_t k{20};
seqan3::seed s{0x8F3F73B5CF1C9ADEULL};
seqan3::seed s{0x8F'3F'73'B5'CF'1C'9A'DEULL};
seqan3::shape shape = seqan3::ungapped{k};
seqan3::window_size w_size{60};
};
Expand All @@ -41,10 +42,10 @@ struct estimate_ibf_arguments : min_arguments
{
bool compressed = false;
std::vector<uint16_t> expression_thresholds{}; // Expression levels which should be created
uint8_t number_expression_thresholds{}; // If set, the expression levels are determined by the program.
uint8_t number_expression_thresholds{}; // If set, the expression levels are determined by the program.
bool samplewise{false};

template<class Archive>
template <class Archive>
void save(Archive & archive) const
{
archive(k);
Expand All @@ -57,7 +58,7 @@ struct estimate_ibf_arguments : min_arguments
archive(samplewise);
}

template<class Archive>
template <class Archive>
void load(Archive & archive)
{
archive(k);
Expand Down Expand Up @@ -119,8 +120,7 @@ void load_ibf(IBFType & ibf, std::filesystem::path ipath)
* \param opath Path, where the IBF should be stored.
*/
template <class IBFType>
void store_ibf(IBFType const & ibf,
std::filesystem::path opath)
void store_ibf(IBFType const & ibf, std::filesystem::path opath)
{
std::ofstream os{opath, std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
Expand Down
6 changes: 3 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
cmake_minimum_required (VERSION 3.25)

find_package(OpenMP REQUIRED COMPONENTS CXX)
add_library ("${PROJECT_NAME}_lib" STATIC ibf.cpp estimate.cpp)
find_package (OpenMP REQUIRED COMPONENTS CXX)
add_library ("${PROJECT_NAME}_lib" STATIC estimate.cpp ibf.cpp)
target_link_libraries ("${PROJECT_NAME}_lib" PUBLIC seqan3::seqan3)
target_link_libraries ("${PROJECT_NAME}_lib" PUBLIC robin_hood::robin_hood)
target_link_libraries("${PROJECT_NAME}_lib" PUBLIC OpenMP::OpenMP_CXX)
target_link_libraries ("${PROJECT_NAME}_lib" PUBLIC OpenMP::OpenMP_CXX)
target_include_directories ("${PROJECT_NAME}_lib" PUBLIC ../include)

add_executable ("${PROJECT_NAME}" main.cpp)
Expand Down
Loading