#ifndef BIFROST_COMPACTED_DBG_HPP
#define BIFROST_COMPACTED_DBG_HPP

#include <stddef.h>
#include <limits>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <cstdio>
#include <climits>
#include <functional>
#include <getopt.h>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <stdint.h>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include <thread>
#include <atomic>
#include <mutex>

#include "BlockedBloomFilter.hpp"
#include "Common.hpp"
#include "File_Parser.hpp"
#include "FASTX_Parser.hpp"
#include "GFA_Parser.hpp"
#include "Kmer.hpp"
#include "KmerCovIndex.hpp"
#include "KmerHashTable.hpp"
#include "KmerIterator.hpp"
#include "KmerStream.hpp"
#include "Lock.hpp"
#include "minHashIterator.hpp"
#include "MinimizerIndex.hpp"
#include "RepHash.hpp"
#include "TinyVector.hpp"
#include "Unitig.hpp"
#include "UnitigIterator.hpp"
#include "UnitigMap.hpp"

#include "roaring.hh"

#define MASK_CONTIG_ID (0xffffffff00000000)
#define MASK_CONTIG_TYPE (0x80000000)
#define MASK_CONTIG_POS (0x7fffffff)
#define RESERVED_ID (0xffffffff)

#define DEFAULT_K 31

#define DEFAULT_G_DEC1 8
#define DEFAULT_G_DEC2 4

/** @file src/CompactedDBG.hpp
* Interface for the Compacted de Bruijn graph API.
* Code snippets using this interface are provided in snippets/test.cpp.
*/

using namespace std;

/** @struct CDBG_Build_opt
* @brief Most members of this structure are parameters for CompactedDBG<U, G>::build(), except for:
* - CDBG_Build_opt::k and CDBG_Build_opt::g as they are parameters of the graph constructor.
* - CDBG_Build_opt::clipTips, CDBG_Build_opt::deleteIsolated and CDBG_Build_opt::useMercyKmers are used
* by CompactedDBG<U, G>::simplify
* - CDBG_Build_opt::prefixFilenameOut and CDBG_Build_opt::outputGFA are used by CompactedDBG<U, G>::write
* - CDBG_Build_opt::filename_graph_in is used by CompactedDBG<U, G>::read
* - CDBG_Build_opt::build and CDBG_Build_opt::update
* Most parameters have default values.
* An example of using such a structure is shown in src/Bifrost.cpp.
* @var CDBG_Build_opt::verbose
* Print information messages during execution if true. Default is false.
* @var CDBG_Build_opt::nb_threads
* Number of threads to use for building the graph. Default is 1.
* @var CDBG_Build_opt::nb_bits_kmers_bf
* Number of Bloom filter bits per k-mer occurring in the FASTA/FASTQ/GFA files of
* CDBG_Build_opt::filename_in. Default is 14.
* @var CDBG_Build_opt::prefixFilenameOut
* Prefix for the name of the file to which the graph must be written. Mandatory parameter.
* @var CDBG_Build_opt::inFilenameBBF
* String containing the name of a Bloom filter file that is generated by CompactedDBG<U, G>::filter.
* If empty, CompactedDBG<U, G>::filter is called. Otherwise, the Bloom filter is loaded from this file
* and CompactedDBG<U, G>::filter is not called. Note that you need such a Bloom filter even in reference
* mode. Default is empty string (no input file).
* @var CDBG_Build_opt::outFilenameBBF
* String containing the name of a Bloom filter file that will be generated by CompactedDBG<U, G>::filter.
* If empty, the file is not created. Otherwise, the Bloom filter is written to this file. Default is
* empty string (no output file).
* @var CDBG_Build_opt::filename_seq_in
* Vector of strings, each string is the name of a FASTA/FASTQ/GFA file to use for the graph construction.
* Each such file will be filtered before construction such that k-mers with exactly one occurrence in
* those files will be discarded. Mandatory parameter.
* @var CDBG_Build_opt::filename_ref_in
* Vector of strings, each string is the name of a FASTA/FASTQ/GFA file to use for the graph construction.
* Each such file will NOT be filtered before construction such that all k-mers in those files will be used.
* Mandatory parameter.
* @var CDBG_Build_opt::k
* Length of k-mers (not used by CompactedDBG<U, G>::build). Default is 31.
* @var CDBG_Build_opt::g
* Length of g-mers, the minimizers, such that g < k (not used by CompactedDBG<U, G>::build).
* Default is 23.
* @var CDBG_Build_opt::build
* Boolean indicating if the graph must be built. This parameter is not used by any function of
* CompactedDBG<U, G> but is used by the Bifrost CLI. Default is false.
* @var CDBG_Build_opt::update
* Boolean indicating if the graph must be updated. This parameter is not used by any function of
* CompactedDBG<U, G> but is used by the Bifrost CLI. Default is false.
* @var CDBG_Build_opt::clipTips
* Clip short tips (length < 2k) of the graph (not used by CompactedDBG<U, G>::build). Default is false.
* @var CDBG_Build_opt::deleteIsolated
* Remove short isolated unitigs (length < 2k) of the graph (not used by CompactedDBG<U, G>::build).
* Default is false.
* @var CDBG_Build_opt::useMercyKmers
* Keep in the graph low coverage k-mers (cov=1) connecting tips of the graph. Default is false.
* @var CDBG_Build_opt::filename_graph_in
* String containing the name of a GFA file to read using CompactedDBG<U, G>::read. Default is empty
* string (no input file).
* @var CDBG_Build_opt::outputGFA
* Boolean indicating if the graph is written to a GFA file. Default is true.
* @var CDBG_Build_opt::outputFASTA
* Boolean indicating if the graph is written to a FASTA file. Default is false.
* @var CDBG_Build_opt::outputBFG
* Boolean indicating if the graph is written to a BFG/BFI file. Default is false.
*/
struct CDBG_Build_opt {

    bool verbose;

    size_t nb_threads;

    size_t min_count_km;

    size_t nb_bits_kmers_bf;

    string inFilenameBBF;
    string outFilenameBBF;

    vector<string> filename_seq_in;
    vector<string> filename_ref_in;

    // The following members are NOT used by CompactedDBG<U, G>::build
    // but you can set them to use them as parameters for other functions
    // such as CompactedDBG<U, G>::simplify, CompactedDBG<U, G>::read or
    // CompactedDBG<U, G>::write.

    int k, g;

    bool build;
    bool update;
    bool query;

    bool clipTips;
    bool deleteIsolated;
    bool useMercyKmers;

    bool outputGFA;
    bool outputFASTA;
    bool outputBFG;

    bool compressOutput;
    bool inexact_search;

    bool writeIndexFile;

    double ratio_kmers;

    string prefixFilenameOut;

    string filename_graph_in;
    string filename_index_in;

    vector<string> filename_query_in;

    CDBG_Build_opt() :  nb_threads(1), k(DEFAULT_K), g(-1), nb_bits_kmers_bf(14), ratio_kmers(0.8), min_count_km(1),
                        build(false), update(false), query(false), clipTips(false), deleteIsolated(false),
                        inexact_search(false), writeIndexFile(true), useMercyKmers(false), outputGFA(true),
                        outputFASTA(false), outputBFG(false), compressOutput(true), verbose(false) {}
};

/** @typedef const_UnitigMap
* @brief const_UnitigMap is a constant UnitigMap. The main difference in its usage with a UnitigMap object
* is when you call the method UnitigMap::getGraph(): with a const_UnitigMap, this method returns
* a pointer to a constant CompactedDBG (you can't modify it).
*/
template<typename U = void, typename G = void> using const_UnitigMap = UnitigMap<U, G, true>;

/** @class CDBG_Data_t
* @brief If data are to be associated with the unitigs of the compacted de Bruijn graph, those data
* must be wrapped into a class that inherits from the abstract class CDBG_Data_t. Otherwise it will
* not compile. To associate data of type "MyUnitigData" to unitigs, class MyUnitigData must be declared
* as follows:
* \code{.cpp}
* class MyUnitigData : public CDBG_Data_t<MyUnitigData, MyGraphData> { ... };
* ...
* CompactedCDBG<MyUnitigData, MyGraphData> cdbg;
* \endcode
* An object of type MyUnitigData represents an instanciation of user data associated to one
* unitig of the graph.
* CDBG_Data_t has two template parameters: the type of unitig data ("MyUnitigData") and the type of
* graph data ("MyGraphData"). Indeed, if class MyUnitigData is going to be used in combination with
* class MyGraphData for a CompactedDBG, MyUnitigData must "know" the type MyGraphData for the parameters
* of its (mandatory) functions. If no graph data is used, you do not have to specify the template
* parameter MyGraphData or you can void it:
* \code{.cpp}
* class MyUnitigData : public CDBG_Data_t<MyUnitigData> { ... };
* class MyUnitigData : public CDBG_Data_t<MyUnitigData, void> { ... }; // Equivalent to previous notation
* ...
* CompactedCDBG<MyUnitigData> cdbg;
* \endcode
* Because CDBG_Data_t is an abstract class, all the methods from the base class (CDBG_Data_t) must be
* implemented in your wrapper (the derived class, aka MyUnitigData in this example). IMPORTANT: If you do
* not implement those methods in your class, default ones that have no effect will be applied. Do not
* forget to implement copy and move constructors/destructors as well as copy and move assignment operators.
* An example of using such a structure is shown in snippets/test.cpp.
*/
template<typename Unitig_data_t, typename Graph_data_t = void> //Curiously Recurring Template Pattern (CRTP)
class CDBG_Data_t {

    public:

        /**
        * Clear the data associated with a unitig.
        * @param um_dest is a UnitigMap object representing a unitig (the reference sequence of um_dest) for which the data must be
        * cleared. The object calling this function represents the data associated with the reference unitig of um_dest.
        */
        void clear(const UnitigMap<Unitig_data_t, Graph_data_t>& um_dest){}

        /**
        * Join data of two unitigs which are going to be concatenated.
        * Specifically, if A is the reference unitig of the UnitigMap um_dest and B is the reference unitig of the UnitigMap um_src,
        * then after this function returns, unitigs A amd B will be removed and a unitig C = AB will be added to the graph.
        * The object calling this function represents the data associated with the new unitig C = AB. If um_dest.strand = false,
        * then the reverse-complement of A is going to be used in the concatenation. Reciprocally, if um_src.strand = false, then
        * the reverse-complement of B is going to be used in the concatenation. The two unitigs A and B are guaranteed to be from the
        * same graph. The data of each unitig can be accessed through the UnitigMap::getData.
        * @param um_dest is a UnitigMap object representing a unitig (the reference sequence of the mapping) to which another unitig
        * is going to be appended. The object calling this function represents the data associated with the reference unitig of um_dest.
        * @param um_src is a UnitigMap object representing a unitig (the reference sequence of the mapping) that will be appended
        * at the end of the unitig represented by parameter um_dest.
        */
        void concat(const UnitigMap<Unitig_data_t, Graph_data_t>& um_dest, const UnitigMap<Unitig_data_t, Graph_data_t>& um_src){}

        /**
        * Merge the data of a sub-unitig B to the data of a sub-unitig A.
        * The object calling this function represents the data associated with the reference unitig of um_dest.
        * The two unitigs A and B are NOT guaranteed to be from the same graph. The data of each unitig can be accessed through the
        * UnitigMap::getData.
        * @param um_dest is a UnitigMap object representing a sub-unitig (the mapped sequence of the mapping) A. The object calling this
        * function represents the data associated with the reference unitig of um_dest.
        * @param um_src is a UnitigMap object representing a sub-unitig (the mapped sequence of the mapping) for which the data must be
        * merged with the data of sub-unitig B (given by parameter um_dest).
        */
        void merge(const UnitigMap<Unitig_data_t, Graph_data_t>& um_dest, const const_UnitigMap<Unitig_data_t, Graph_data_t>& um_src){}

        /**
        * Extract data corresponding to a sub-unitig of a unitig A. The extracted sub-unitig, called B in the following, is defined
        * as a mapping to A given by the input UnitigMap object um_src. Hence, B = A[um_src.dist, um_src.dist + um_src.len + k - 1]
        * or B = rev(A[um_src.dist, um_src.dist + um_src.len + k - 1]) if um_src.strand == false (B is reverse-complemented).
        * After the function returns, unitig A is deleted from the graph and B is inserted in the graph (along with their data) IF the
        * input parameter last_extraction == true. The object calling this function represents the data to associate with sub-unitig B.
        * @param um_src is a UnitigMap object representing the mapping to a unitig A from which a new unitig B will be extracted, i.e,
        * B = A[um_src.dist, um_src.dist + um_src.len + k - 1] or B = rev(A[um_src.dist, um_src.dist + um_src.len + k - 1]) if
        * um_src.strand == false.
        * @param last_extraction is a boolean indicating if this is the last call to this function on the reference unitig used for the
        * mapping given by um_src. If last_extraction is true, the reference unitig A of um_src will be removed from the graph right
        * after this function returns. Also, all unitigs B extracted from the reference unitig A, along with their data, will be inserted
        * in the graph.
        */
        void extract(const UnitigMap<Unitig_data_t, Graph_data_t>& um_src, bool last_extraction){}

        /**
        * Serialize the data to a GFA-formatted string. This function is used when the graph is written to disk in GFA format.
        * If the returned string is not empty, the string is appended as an optional field to the Segment line matching the unitig to which
        * this data is associated. Note that it is your responsability to add GFA-compatible tags matching your data in the string.
        * @param um_src is a const_UnitigMap object representing the (reference) unitig to which the data to serialize is
        * associated.
        * @return a string which is the serialization of the data.
        */
        string serialize(const const_UnitigMap<Unitig_data_t, Graph_data_t>& um_src) const {

            return string();
        }
};

/** @class CompactedDBG
* @brief Represent a Compacted de Bruijn graph. The two template parameters of this class corresponds to the type of data
* to associate with the unitigs of the graph (unitig data) and the type of data to associate with the graph (graph data).
* If no template parameters are specified or if the types are void, no data are associated with the unitigs nor the graph and
* no memory will be allocated for such data.
* \code{.cpp}
* CompactedDBG<> cdbg_1; // No unitig data, no graph data
* CompactedDBG<void> cdbg_2; // Equivalent to previous notation
* CompactedDBG<void, void> cdbg_3; // Equivalent to previous notation
* CompactedDBG<MyUnitigData> cdbg_4; // An object of type MyUnitigData will be associated with each unitig, no graph data
* CompactedDBG<MyUnitigData, void> cdbg_5; // Equivalent to previous notation
* CompactedDBG<void, MyGraphData> cdbg_6; // No unitig data, an object of type MyGraphData will be associated with the graph
* CompactedDBG<MyUnitigData, MyGraphData> cdbg_7; // Unitig data of type MyUnitigData for each unitig, graph data of type MyGraphData
* \endcode
* If data are to be associated with the unitigs, these data must be wrapped into a class that inherits from the abstract class
* CDBG_Data_t, such as in:
* \code{.cpp}
* class MyUnitigData : public CDBG_Data_t<MyUnitigData> { ... };
* CompactedDBG<MyUnitigData> cdbg;
* \endcode
* Because CDBG_Data_t is an abstract class, all the methods from the base class (CDBG_Data_t) must be
* implemented in your wrapper (the derived class, aka MyUnitigData in this example). IMPORTANT: If you do
* not implement those methods in your class, default ones that have no effect will be applied.
*/
template<typename Unitig_data_t = void, typename Graph_data_t = void>
class CompactedDBG {

    static_assert(is_void<Unitig_data_t>::value || is_base_of<CDBG_Data_t<Unitig_data_t, Graph_data_t>, Unitig_data_t>::value,
                  "Type of data associated with vertices of class CompactedDBG must be void (no data) or a class extending class CDBG_Data_t");

    typedef Unitig_data_t U;
    typedef Graph_data_t G;

    public:

        template<typename U, typename G, bool is_const> friend class UnitigMap;
        template<typename U, typename G, bool is_const> friend class unitigIterator;
        template<typename U, typename G, bool is_const> friend class neighborIterator;

        template<typename X, typename Y> friend class CompactedDBG;

        typedef unitigIterator<U, G, false> iterator; /**< An iterator for the unitigs of the graph. No specific order is assumed. */
        typedef unitigIterator<U, G, true> const_iterator; /**< A constant iterator for the unitigs of the graph. No specific order is assumed. */

        /** Constructor (set up an empty compacted dBG).
        * @param kmer_length is the length k of k-mers used in the graph (each unitig is of length at least k).
        * @param minimizer_length is the length g of minimizers (g < k) used in the graph.
        */
        CompactedDBG(const int kmer_length = DEFAULT_K, const int minimizer_length = -1);

        /** Copy constructor (copy a compacted de Bruijn graph).
        * This function is expensive in terms of time and memory as the content of a compacted
        * de Bruijn graph is copied.  After the call to this function, the same graph exists twice in memory.
        * @param o is a constant reference to the compacted de Bruijn graph to copy.
        */
        CompactedDBG(const CompactedDBG<U, G>& o); // Copy constructor

        /** Move constructor (move a compacted de Bruijn graph).
        * The content of o is moved ("transfered") to a new compacted de Bruijn graph.
        * The compacted de Bruijn graph referenced by o will be empty after the call to this constructor.
        * @param o is a reference on a reference to the compacted de Bruijn graph to move.
        */
        CompactedDBG(CompactedDBG<U, G>&& o); // Move constructor

        /** Destructor.
        */
        virtual ~CompactedDBG();

        /** Copy assignment operator (copy a compacted de Bruijn graph).
        * This function is expensive in terms of time and memory as the content of a compacted
        * de Bruijn graph is copied.  After the call to this function, the same graph exists twice in memory.
        * @param o is a constant reference to the compacted de Bruijn graph to copy.
        * @return a reference to the compacted de Bruijn which is the copy.
        */
        CompactedDBG<U, G>& operator=(const CompactedDBG<U, G>& o);

        /** Move assignment operator (move a compacted de Bruijn graph).
        * The content of o is moved ("transfered") to a new compacted de Bruijn graph.
        * The compacted de Bruijn graph referenced by o will be empty after the call to this operator.
        * @param o is a reference on a reference to the compacted de Bruijn graph to move.
        * @return a reference to the compacted de Bruijn which has (and owns) the content of o.
        */
        CompactedDBG<U, G>& operator=(CompactedDBG<U, G>&& o);

        /** Addition assignment operator (merge a compacted de Bruijn graph).
        * After merging, all unitigs of o have been added to and compacted with the current compacted de Bruijn graph (this).
        * If the unitigs of o had data of type "MyUnitigData" associated, they have been added to the current compacted
        * de Bruijn graph using the functions of the class MyUnitigData which are in base class CDBG_Data_t<MyUnitigData>.
        * This function is similar to CompactedDBG::merge except that it uses only one thread while CompactedDBG::merge can
        * work with multiple threads (number of threads provided as a parameter).
        * Note that if multiple compacted de Bruijn graphs have to be merged, it is more efficient to call CompactedDBG::merge
        * with a vector of CompactedDBG as input.
        * @param o is a constant reference to the compacted de Bruijn graph to merge.
        * @return a reference to the current compacted de Bruijn after merging.
        */
        CompactedDBG<U, G>& operator+=(const CompactedDBG<U, G>& o);

        /** Equality operator.
        * @return a boolean indicating if two compacted de Bruijn graphs have the same unitigs (does not compare the data
        * associated with the unitigs).
        */
        bool operator==(const CompactedDBG<U, G>& o) const;

        /** Inequality operator.
        * @return a boolean indicating if two compacted de Bruijn graphs have different unitigs (does not compare the data
        * associated with the unitigs).
        */
        inline bool operator!=(const CompactedDBG<U, G>& o) const;

        /** Clear the graph: empty the graph and reset its parameters.
        */
        void clear();

        /** Build the Compacted de Bruijn graph.
        * @param opt is a structure from which the members are parameters of this function. See CDBG_Build_opt.
        * @return boolean indicating if the graph has been built successfully.
        */
        bool build(CDBG_Build_opt& opt);

        /** Simplify the Compacted de Bruijn graph: clip short (< 2k length) tips and/or delete short (< 2k length) isolated unitigs.
        * @param delete_short_isolated_unitigs is a boolean indicating short isolated unitigs must be removed.
        * @param clip_short_tips is a boolean indicating short tips must be clipped.
        * @param verbose is a boolean indicating if information messages must be printed during the function execution.
        * @return boolean indicating if the graph has been simplified successfully.
        */
        bool simplify(const bool delete_short_isolated_unitigs = true, const bool clip_short_tips = true, const bool verbose = false);

        /** Write the Compacted de Bruijn graph to disk (GFA1 format).
        * @param output_fn is a string containing the name of the file in which the graph will be written.
        * @param nb_threads is a number indicating how many threads can be used to write the graph to disk.
        * @param GFA_output indicates if the graph will be output in GFA format.
        * @param FASTA_output indicates if the graph will be output in FASTA format.
        * @param BFG_output indicates if the graph will be output in BFG/BFI format.
        * @param write_index_file indicates if an index file is written to disk. Index files enable faster graph loading.
        * This parameter is discarded if BFG format output is selected (index output is required then).
        * @param compressed_output indicates if the output file is compressed.
        * @param verbose is a boolean indicating if information messages must be printed during the function execution.
        * @return boolean indicating if the graph has been written successfully.
        */
        bool write( const string& output_fn, const size_t nb_threads = 1, const bool GFA_output = true, const bool FASTA_output = false,
                    const bool BFG_output = false, const bool write_index_file = true, const bool compressed_output = false,
                    const bool verbose = false) const;

        /** Load a Compacted de Bruijn graph from disk (GFA1 or FASTA format). This function detects if an index file (BFI format)
        * exists (same prefix as graph) for the input graph and will use it to load the graph. Otherwise, loading will be slower
        * than read() with the index graph file.
        * If the input GFA file has not been built by Bifrost or if the input is FASTA format, it is your responsibility to make sure
        * that the graph is correctly compacted and to set correctly the parameters of the graph (such as the k-mer length) before the
        * call to this function.
        * @param input_graph_fn is a string containing the name of the graph file to read.
        * @param nb_threads is a number indicating how many threads can be used to read the graph from disk.
        * @param verbose is a boolean indicating if information messages must be printed during the function execution.
        * @return boolean indicating if the graph has been read successfully.
        */
        bool read(const string& input_graph_fn, const size_t nb_threads = 1, const bool verbose = false);

        /** Read a Compacted de Bruijn graph from disk (GFA1, FASTA or BFG format) using an index file (BFI format).
        * Index files make the loading much faster than the other function read() without meta graph file.
        * If the input GFA file has not been built by Bifrost or if the input is FASTA format, it is your responsibility to make sure
        * that the graph is correctly compacted and to set correctly the parameters of the graph (k-mer length and g-mer) before the
        * call to this function.
        * @param input_graph_fn is a string containing the name of the graph file to read.
        * @param input_index_fn is a string containing the name of the index file to read.
        * @param nb_threads is a number indicating how many threads can be used to read the graph from disk.
        * @param verbose is a boolean indicating if information messages must be printed during the function execution.
        * @return boolean indicating if the graph has been read successfully.
        */
        bool read(const string& input_graph_fn, const string& input_index_fn, const size_t nb_threads = 1, const bool verbose = false);

        /** Find the unitig containing the queried k-mer in the Compacted de Bruijn graph.
        * @param km is the queried k-mer (see Kmer class). It does not need to be a canonical k-mer.
        * @param extremities_only is a boolean indicating if the k-mer must be searched only in the unitig heads and tails (extremities_only = true).
        * By default, the k-mer is searched everywhere (extremities_only = false) but is is slightly slower than looking only in the unitig heads and tails.
        * @return UnitigMap<U, G> object containing the k-mer mapping information to the unitig containing the queried k-mer (if present).
        * If the queried k-mer is not found, UnitigMap::isEmpty = true (see UnitigMap class).
        */
        UnitigMap<U, G> find(const Kmer& km, const bool extremities_only = false);

        /** Find the unitig containing the queried k-mer in the Compacted de Bruijn graph.
        * @param km is the queried k-mer (see Kmer class). It does not need to be a canonical k-mer.
        * @param extremities_only is a boolean indicating if the k-mer must be searched only in the unitig heads and tails (extremities_only = true).
        * By default, the k-mer is searched everywhere (extremities_only = false) but is is slightly slower than looking only in the unitig heads and tails.
        * @return const_UnitigMap<U, G> object containing the k-mer mapping information to the unitig having the queried k-mer (if present).
        * If the k-mer is not found, const_UnitigMap::isEmpty = true (see UnitigMap class).
        */
        const_UnitigMap<U, G> find(const Kmer& km, const bool extremities_only = false) const;

        /** Find the unitig containing the k-mer starting at a given position in a query sequence and extends the mapping (if the k-mer is found, the
        * function extends the mapping from the k-mer as long as the query sequence and the unitig matches).
        * @param s is a pointer to an array of character containing the sequence to query.
        * @param pos is the position of the first k-mer to find in the sequence to query.
        * @param len is the length of s.
        * @return UnitigMap<U, G> object containing the mapping information to the unitig having the queried k-mer (if present).
        * If the k-mer is found, the function extends the mapping from the k-mer as long as the query sequence and the unitig matches (um.len >= 1).
        */
        UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len);

        /** Find the unitig containing the k-mer starting at a given position in a query sequence and extends the mapping (if the k-mer is found, the
        * function extends the mapping from the k-mer as long as the query sequence and the unitig matches).
        * @param s is a pointer to an array of character containing the sequence to query.
        * @param pos is the position of the first k-mer to find in the sequence to query.
        * @param len is the length of s.
        * @return const_UnitigMap<U, G> object containing the mapping information to the unitig having the queried k-mer (if present).
        * If the k-mer is found, the function extends the mapping from the k-mer as long as the query sequence and the unitig matches (um.len >= 1).
        */
        const_UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len) const;

        /** Performs exact and/or inexact search of the k-mers of a sequence query in the Compacted de Bruijn graph.
        * @param s is a string representing the sequence to be searched (the query).
        * @param exact is a boolean indicating if the exact k-mers of string s must be searched.
        * @param insertion is a boolean indicating if the inexact k-mers of string s, with one insertion, must be searched.
        * @param deletion is a boolean indicating if the inexact k-mers of string s, with one deletion, must be searched.
        * @param substitution is a boolean indicating if the inexact k-mers of string s, with one substitution, must be searched.
        * @param or_exclusive_match is a boolean indicating to NOT search for the inexact k-mers at any given position in s
        * if the exact corresponding k-mer at that position is found in the graph. This option might lead to a substantial running time decrease.
        * @return a vector of pair<size_t, UnitigMap<U, G>> objects. Each such pair has two elements: the position of the k-mer match in sequence s
        * and the corresponding k-mer match in the graph. Note that no information is given on whether the match is exact or inexact, nor on what edit
        * operation makes the match to be inexact or at what position the edit operation takes place.
        */
        vector<pair<size_t, UnitigMap<U, G>>> searchSequence(   const string& s, const bool exact, const bool insertion, const bool deletion,
                                                                const bool substitution, const bool or_exclusive_match = false);

        /** Performs exact and/or inexact search of the k-mers of a sequence query in the Compacted de Bruijn graph.
        * @param s is a string representing the sequence to be searched (the query).
        * @param exact is a boolean indicating if the exact k-mers of string s must be searched.
        * @param insertion is a boolean indicating if the inexact k-mers of string s, with one insertion, must be searched.
        * @param deletion is a boolean indicating if the inexact k-mers of string s, with one deletion, must be searched.
        * @param substitution is a boolean indicating if the inexact k-mers of string s, with one substitution, must be searched.
        * @param or_exclusive_match is a boolean indicating to NOT search for the inexact k-mers at any given position in s
        * if the exact corresponding k-mer at that position is found in the graph. This option might lead to a substantial running time decrease.
        * @return a vector of pair<size_t, const_UnitigMap<U, G>> objects. Each such pair has two elements: the position of the k-mer match in sequence s
        * and the corresponding k-mer match in the graph. Note that no information is given on whether the match is exact or inexact, nor on what edit
        * operation makes the match to be inexact or at what position the edit operation takes place.
        */
        vector<pair<size_t, const_UnitigMap<U, G>>> searchSequence( const string& s, const bool exact, const bool insertion, const bool deletion,
                                                                    const bool substitution, const bool or_exclusive_match = false) const;

        /** Add a sequence to the Compacted de Bruijn graph. Non-{A,C,G,T} characters such as Ns are discarded.
        * The function automatically breaks the sequence into unitig(s). Those unitigs can be stored as the reverse-complement
        * of the input sequence.
        * @param seq is a string containing the sequence to insert.
        * @param verbose is a boolean indicating if information messages must be printed during the function execution.
        * @return a boolean indicating if the sequence was successfully inserted in the graph.
        */
        bool add(const string& seq, const bool verbose = false);

        /** Remove a unitig from the Compacted de Bruijn graph.
        * @param um is a UnitigMap object containing the information of the unitig to remove from the graph.
        * @param verbose is a boolean indicating if information messages must be printed during the execution of the function.
        * @return a boolean indicating if the unitig was successfully removed from the graph.
        */
        bool remove(const const_UnitigMap<U, G>& um, const bool verbose = false);

        /** Merge a compacted de Bruijn graph.
        * After merging, all unitigs of o have been added to and compacted with the current compacted de Bruijn graph (this).
        * If the unitigs of o had data of type "MyUnitigData" associated, they have been added to the current compacted
        * de Bruijn graph using the functions of the class MyUnitigData which are also present in its base class
        * CDBG_Data_t<MyUnitigData>.
        * Note that if multiple compacted de Bruijn graphs have to be merged, it is more efficient to call CompactedDBG::merge
        * with a vector of CompactedDBG as input.
        * @param o is a constant reference to the compacted de Bruijn graph to merge.
        * @param nb_threads is an integer indicating how many threads can be used during the merging.
        * @param verbose is a boolean indicating if information messages must be printed during the execution of the function.
        * @return a boolean indicating if the graph has been successfully merged.
        */
        bool merge(const CompactedDBG& o, const size_t nb_threads = 1, const bool verbose = false);

        /** Merge multiple compacted de Bruijn graphs.
        * After merging, all unitigs of the compacted de Bruijn graphs have been added to and compacted
        * with the current compacted de Bruijn graph (this). If the unitigs had data of type "MyUnitigData"
        * associated, they have been added to the current compacted de Bruijn graph using the functions of the
        * class MyUnitigData which are also present in its base class CCDBG_Data_t<MyUnitigData>.
        * @param v is a constant reference to a vector of colored and compacted de Bruijn graphs to merge.
        * @param nb_threads is an integer indicating how many threads can be used during the merging.
        * @param verbose is a boolean indicating if information messages must be printed during the execution of the function.
        * @return a boolean indicating if the graphs have been successfully merged.
        */
        bool merge(const vector<CompactedDBG>& v, const size_t nb_threads = 1, const bool verbose = false);

        /** Create an iterator to the first unitig of the Compacted de Bruijn graph (unitigs are NOT sorted lexicographically).
        * @return an iterator to the first unitig of the graph.
        */
        iterator begin();

        /** Create an constant iterator to the first unitig of the Compacted de Bruijn graph (unitigs are NOT sorted lexicographically).
        * @return a constant iterator to the first unitig of the graph.
        */
        const_iterator begin() const;

        /** Create an iterator to the "past-the-last" unitig of the Compacted de Bruijn graph (unitigs are NOT sorted lexicographically).
        * @return an iterator to the "past-the-last" unitig of the graph.
        */
        iterator end();

        /** Create a constant iterator to the "past-the-last" unitig of the Compacted de Bruijn graph (unitigs are NOT sorted lexicographically).
        * @return a constant iterator to the "past-the-last" unitig of the graph.
        */
        const_iterator end() const;

        /** Return the sum of the unitigs length.
        * @return An integer which corresponds to the sum of the unitigs length.
        */
        size_t length() const;

        /** Return the number of k-mers in the graph.
        * @return An integer which corresponds to the number of k-mers in the graph.
        */
        size_t nbKmers() const;

        /** Return a boolean indicating if the graph is invalid (wrong input parameters/files, error occurring during a method, etc.).
        * @return A boolean indicating if the graph is invalid.
        */
        inline bool isInvalid() const { return invalid; }

        /** Return the length of k-mers of the graph.
        * @return Length of k-mers of the graph.
        */
        inline int getK() const { return k_; }

        /** Return the length of minimizers of the graph.
        * @return Length of minimizers of the graph.
        */
        inline int getG() const { return g_; }

        /** Return the number of unitigs in the graph.
        * @return Number of unitigs in the graph.
        */
        inline size_t size() const { return v_unitigs.size() + km_unitigs.size() + h_kmers_ccov.size(); }

        /** Return a pointer to the graph data. Pointer is nullptr if type of graph data is void.
        * @return A pointer to the graph data. Pointer is nullptr if type of graph data is void.
        */
        inline G* getData() { return data.getData(); }

        /** Return a constant pointer to the graph data. Pointer is nullptr if type of graph data is void.
        * @return A constant pointer to the graph data. Pointer is nullptr if type of graph data is void.
        */
        inline const G* getData() const { return data.getData(); }

        bool search(const vector<string>& query_filenames, const string& out_filename_prefix,
                    const double ratio_kmers, const bool inexact_search, const size_t nb_threads,
                    const size_t verbose = false) const;

        bool writeBinary(const string& fn, const size_t nb_threads = 1) const;
        bool writeBinary(ostream& out, const size_t nb_threads = 1) const;

        bool readBinary(const string& fn, bool static_m=false, uint32_t threads=1);
        bool readBinary(istream& in, std::vector<Minimizer>& minz, uint32_t threads=1);
        bool readBinary(istream& in, boophf_t* mphf, uint32_t threads=1);

        bool readMinimizers(istream& in, std::vector<Minimizer>& minz, uint32_t threads=1);
        size_t writeMinimizers(ostream& out);
        void clearAndGetMinimizers(std::vector<Minimizer>& minz);


        void to_static(uint32_t threads=1, float gamma=2.0);

    protected:

        bool annotateSplitUnitigs(const CompactedDBG<U, G>& o, const size_t nb_threads = 1, const bool verbose = false);

        pair<size_t, size_t> splitAllUnitigs();
        pair<size_t, size_t> getSplitInfoAllUnitigs() const;

        inline size_t joinUnitigs(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1) {

            return joinUnitigs_<is_void<U>::value>(v_joins, nb_threads);
        }

        bool mergeData(const CompactedDBG<U, G>& o, const size_t nb_threads = 1, const bool verbose = false);
        bool mergeData(CompactedDBG<U, G>&& o, const size_t nb_threads = 1, const bool verbose = false);

    private:

        bool writeBinaryGraph(ostream& out, const size_t nb_threads = 1) const;
        bool writeBinaryGraph(const string& fn, const size_t nb_threads = 1) const;

        bool writeBinaryIndex(ostream& out, const uint64_t checksum, const size_t nb_threads = 1) const;
        bool writeBinaryIndex(const string& fn, const uint64_t checksum, const size_t nb_threads = 1) const;

        pair<uint64_t, bool> readBinaryGraph(istream& in);
        pair<uint64_t, bool> readBinaryGraph(const string& fn);

        bool readBinaryMinimizers(istream& in, const uint64_t checksum, std::vector<Minimizer>& minz, uint32_t threads=1);

        bool readBinaryIndex(istream& in, const uint64_t checksum, std::vector<Minimizer>& minz, uint32_t threads=1);
        bool readBinaryIndex(istream& in, const uint64_t checksum, boophf_t* mphf, uint32_t threads=1);
        bool readBinaryIndex(const string& fn, const uint64_t checksum, bool static_m=false, uint32_t threads=1);

        bool readBinaryIndexHead(const string& fn, size_t& file_format_version, size_t& v_unitigs_sz, size_t& km_unitigs_sz,
                                size_t& h_kmers_ccov_sz, size_t& hmap_min_unitigs_sz, uint64_t& read_checksum) const;

        bool readBinaryIndexHead(istream& in, size_t& file_format_version, size_t& v_unitigs_sz, size_t& km_unitigs_sz,
                                size_t& h_kmers_ccov_sz, size_t& hmap_min_unitigs_sz, uint64_t& read_checksum) const;

        uint64_t checksum() const;

        CompactedDBG<U, G>& toDataGraph(CompactedDBG<void, void>&& o, const size_t nb_threads = 1);

        pair<bool, pair<BlockedBloomFilter, Roaring>> filter(const CDBG_Build_opt& opt, const size_t nb_unique_kmers, const size_t nb_non_unique_kmers);
        bool construct(const CDBG_Build_opt& opt, BlockedBloomFilter& bf, Roaring& r, const size_t nb_unique_minimizers, const size_t nb_non_unique_minimizers);

        void addUnitigSequence(const Kmer km, const string& seq, const size_t pos_match_km, const size_t len_match_km, LockGraph& lck_g, const bool map_read = true);
        //void addUnitigSequence(const string& seq);

        size_t findUnitigSequenceBBF(const BlockedBloomFilter& bf, const Kmer km, string& s, bool& isIsolated, vector<Kmer>& l_ignored_km_tip);
        size_t findUnitigSequenceBBF(const BlockedBloomFilter& bf, const Kmer km, string& s, bool& isIsolated, vector<Kmer>& l_ignored_km_tip, LockGraph& lck_g);

        bool bwStepBBF(const BlockedBloomFilter& bf, const Kmer km, Kmer& front, char& c, bool& has_no_neighbor, vector<Kmer>& l_ignored_km_tip, const bool check_fp_cand = true) const;
        bool fwStepBBF(const BlockedBloomFilter& bf, const Kmer km, Kmer& end, char& c, bool& has_no_neighbor, vector<Kmer>& l_ignored_km_tip, const bool check_fp_cand = true) const;

        inline size_t find(const preAllocMinHashIterator<RepHash>& it_min_h) const {

            const int pos = it_min_h.getPosition();
            return (hmap_min_unitigs.find(Minimizer(it_min_h.s + pos).rep()) != hmap_min_unitigs.end() ? 0 : pos - it_min_h.p);
        }

        UnitigMap<U, G> find(const char* s, const size_t pos_km, const minHashIterator<RepHash>& it_min, const bool extremities_only = false);
        const_UnitigMap<U, G> find(const char* s, const size_t pos_km, const minHashIterator<RepHash>& it_min, const bool extremities_only = false) const;

        UnitigMap<U, G> find(const Kmer& km, const preAllocMinHashIterator<RepHash>& it_min_h);

        //vector<const_UnitigMap<U, G>> find(const Minimizer& minz) const;

        vector<const_UnitigMap<U, G>> findPredecessors(const Kmer& km, const bool extremities_only = false) const;
        vector<const_UnitigMap<U, G>> findSuccessors(const Kmer& km, const size_t limit = 4, const bool extremities_only = false) const;

        vector<UnitigMap<U, G>> findPredecessors(const Kmer& km, const bool extremities_only = false);
        vector<UnitigMap<U, G>> findSuccessors(const Kmer& km, const size_t limit = 4, const bool extremities_only = false);

        UnitigMap<U, G> findUnitig(const Kmer& km, const char* s, const size_t pos);
        UnitigMap<U, G> findUnitig(const Kmer& km, const char* s, const size_t pos, const preAllocMinHashIterator<RepHash>& it_min_h);

        UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len, const minHashIterator<RepHash>& it_min);
        const_UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len, const minHashIterator<RepHash>& it_min) const;

        bool addUnitig(const string& str_unitig, const size_t id_unitig);
        bool addUnitig(const string& str_unitig, const size_t id_unitig, const size_t id_unitig_r, const size_t is_short_r);
        bool addUnitig(const string& str_unitig, const size_t id_unitig, SpinLock& lck_unitig, SpinLock& lck_kmer/*, const bool enable_abundant = true*/);
        void swapUnitigs(const bool isShort, const size_t id_a, const size_t id_b);

        bool mergeUnitig(const string& seq, const bool verbose = false);
        bool annotateSplitUnitig(const string& seq, const bool verbose = false);
        bool annotateSplitUnitig(const string& seq, LockGraph& lck_g, const bool verbose = false);

        template<bool is_void>
        inline typename std::enable_if<!is_void, void>::type mergeData_(const UnitigMap<U, G>& a, const const_UnitigMap<U, G>& b){

            a.getData()->merge(a, b);
        }

        template<bool is_void>
        inline typename std::enable_if<is_void, void>::type mergeData_(const UnitigMap<U, G>& a, const const_UnitigMap<U, G>& b) {}

        template<bool is_void>
        typename std::enable_if<!is_void, void>::type deleteUnitig_(const bool isShort, const bool isAbundant,
                                                                    const size_t id_unitig, const bool delete_data = true);

        template<bool is_void>
        typename std::enable_if<is_void, void>::type deleteUnitig_( const bool isShort, const bool isAbundant,
                                                                    const size_t id_unitig, const bool delete_data = true);

        void deleteUnitig_(const bool isShort, const bool isAbundant, const size_t id_unitig, const string& str);

        template<bool is_void>
        typename std::enable_if<!is_void, bool>::type extractUnitig_(size_t& pos_v_unitigs, size_t& nxt_pos_insert_v_unitigs,
                                                                    size_t& v_unitigs_sz, size_t& v_kmers_sz, const vector<pair<int,int>>& sp);
        template<bool is_void>
        typename std::enable_if<is_void, bool>::type extractUnitig_(size_t& pos_v_unitigs, size_t& nxt_pos_insert_v_unitigs,
                                                                    size_t& v_unitigs_sz, size_t& v_kmers_sz, const vector<pair<int,int>>& sp);

        pair<size_t, size_t> extractAllUnitigs();

        template<bool is_void>
        typename std::enable_if<!is_void, size_t>::type joinUnitigs_(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1);

        template<bool is_void>
        typename std::enable_if<is_void, size_t>::type joinUnitigs_(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1);

        void moveToAbundant();
        void setFullCoverage(const size_t cov) const;

        void createJoinHT(vector<Kmer>* v_joins, KmerHashTable<Kmer>& joins, const size_t nb_threads) const;
        void createJoinHT(vector<Kmer>* v_joins, KmerHashTable<char>& joins, const size_t nb_threads) const;

        bool checkJoin(const Kmer& a, const const_UnitigMap<U, G>& cm_a, Kmer& b) const;
        void check_fp_tips(KmerHashTable<bool>& ignored_km_tips);
        size_t removeUnitigs(bool rmIsolated, bool clipTips, vector<Kmer>& v);

        size_t joinTips(string filename_MBBF_uniq_kmers, const size_t nb_threads = 1, const bool verbose = false);
        vector<Kmer> extractMercyKmers(const BlockedBloomFilter& bf_uniq_km, const size_t nb_threads = 1, const bool verbose = false);

        bool writeGFA(const string& fn, const size_t nb_threads = 1, const bool compressed_output = false) const;
        bool writeFASTA(const string& fn, const bool compressed_output = false) const;

        void makeGraphFromGFA(const string& fn, const size_t nb_threads = 1);
        void makeGraphFromFASTA(const string& fn, const size_t nb_threads = 1);

        pair<uint64_t, bool> readGraphFromIndexGFA(const string& graph_fn, const string& index_fn, const size_t k, const size_t g);
        pair<uint64_t, bool> readGraphFromIndexFASTA(const string& graph_fn, const string& index_fn, const size_t k, const size_t g);

        template<bool is_void>
        typename std::enable_if<!is_void, void>::type writeGFA_sequence_(GFA_Parser& graph, KmerHashTable<size_t>& idmap) const;
        template<bool is_void>
        typename std::enable_if<is_void, void>::type writeGFA_sequence_(GFA_Parser& graph, KmerHashTable<size_t>& idmap) const;

        void mapRead(const const_UnitigMap<U, G>& um);
        void mapRead(const const_UnitigMap<U, G>& um, LockGraph& lck_g);

        void unmapRead(const const_UnitigMap<U, G>& um);
        void unmapRead(const const_UnitigMap<U, G>& um, LockGraph& lck_g);

        void setKmerGmerLength(const int kmer_length, const int minimizer_length = -1);
        void print() const;

        vector<pair<size_t, UnitigMap<U, G>>> searchSequence(   const string& seq, const bool exact, const bool insertion, const bool deletion,
                                                                const bool substitution, const double ratio_kmers, const bool or_exclusive_match);

        vector<pair<size_t, const_UnitigMap<U, G>>> searchSequence( const string& seq, const bool exact, const bool insertion, const bool deletion,
                                                                    const bool substitution, const double ratio_kmers, const bool or_exclusive_match) const;

        int k_;
        int g_;

        bool invalid;

        static const int tiny_vector_sz = 2;
        static const int min_abundance_lim = 15;
        static const int max_abundance_lim = 15;

        typedef KmerHashTable<CompressedCoverage_t<U>> h_kmers_ccov_t;

        vector<Unitig<U>*> v_unitigs;
        KmerCovIndex<U> km_unitigs;
        h_kmers_ccov_t h_kmers_ccov;

        MinimizerIndex hmap_min_unitigs;

        wrapperData<G> data;
};

#include "CompactedDBG.tcc"
#include "Search.tcc"
#include "IO.tcc"

#endif
