/* * Extractors.hpp * * Created on: 15.10.2012 * Author: heber */ #ifndef TRAININGDATA_EXTRACTORS_HPP_ #define TRAININGDATA_EXTRACTORS_HPP_ // include config.h #ifdef HAVE_CONFIG_H #include #endif #include #include "Fragmentation/Summation/SetValues/Fragment.hpp" #include "FunctionApproximation/FunctionModel.hpp" class Fragment; /** Namespace containing all simple extractor functions. * * Extractor functions extract distances from a given fragment matching with * a given set of particle types (i.e. elements, e.h. H2O). * Filter functions extract a subset of distances from a given set of distances * to be used with a specific model. * * To this end, each FunctionModel has both a filter and an extractor function. * * The functions in this namespace act as helpers or basic building blocks in * constructing such filters and extractors. * */ namespace Extractors { typedef Fragment::charges_t::const_iterator chargeiter_t; typedef std::vector chargeiters_t; typedef size_t count_t; typedef Fragment::charge_t element_t; typedef std::map< element_t, count_t> elementcounts_t; typedef std::map< element_t, chargeiters_t > elementtargets_t; typedef std::vector< chargeiters_t > targets_per_combination_t; //!> typedef for particle designation typedef int ParticleType_t; //!> typedef for a vector of particle designations typedef std::vector ParticleTypes_t; /** Namespace for some internal helper functions. * */ namespace _detail { /** Gather all distance arguments from the same aligned vector of charges. * * Basically, we filter the positions indicated by the targets but * from a different vector that has the same layout. * * \param positions all nuclei positions * \param charges all nuclei charges * \param targets iterators on charges * \return filtered distance arguments */ FunctionModel::arguments_t gatherDistancesFromTargets( const Fragment::positions_t& positions, const Fragment::charges_t& charges, const chargeiters_t &targets, const size_t globalid ); /** Gather all positions from the same aligned vector of charges. * * Basically, we filter the positions indicated by the targets but * from a different vector that has the same layout. * * \param positions all nuclei positions * \param charges all nuclei charges * \param targets iterators on charges * \return filtered positions */ Fragment::positions_t gatherPositionsFromTargets( const Fragment::positions_t& positions, const Fragment::charges_t& charges, const chargeiters_t& targets ); /** Counts all same elements in the vector and places into map of elements. * * \param elements vector of elements * \return count of same element in vector */ elementcounts_t getElementCounts( const Fragment::charges_t elements ); /** Gather iterators to the elements related to the desired elementcounts. * * \param charges charges wherein to search for the elements * \param elementcounts number of desired hits per element * \return iterators equal to the initial vector of elements */ elementtargets_t convertElementcountsToTargets( const Fragment::charges_t &charges, const elementcounts_t &elementcounts ); /** Convert the alignment back to as it was in the original vector. * * We lost the information by storing it in a map. Hence, we need this * final step. * * \param elementtargets targets as they are in the map \a elementcounts * \param elements the original order of the elements * \param elementcounts the count per element for debugging checks * \return vector of targets in the order as they are in \a element */ chargeiters_t realignElementtargets( const elementtargets_t &elementtargets, const Fragment::charges_t elements, const elementcounts_t &elementcounts ); /** Searches for desired elements in charges in a unique manner. * * The idea is to have e.g. a fragment with charges 8,1,1,2 and * elements as 1,8,1 (e.g. for an angle HOH) and we get the * chargeiters in the desired manner on indices: 1,0,3. * * \param charges charges to look through * \param elements vector of elements to find */ chargeiters_t gatherTargetsFromFragment( const Fragment::charges_t& charges, const Fragment::charges_t elements ); /** Brings all charges together in a map. * * @param charges charges as possible keys and their iterators as values in the map * @param elements list of charges to pick as keys * @return map of key and a vector of charge iterators */ elementtargets_t convertChargesToTargetMap( const Fragment::charges_t& charges, Fragment::charges_t elements ); /** Brings combinatorially together desired list of \a charges and \a targets. * * @param charges list of desired charges * @param elementtargets map of available targets per element * @return vector of chargeiters with all unique combinations */ targets_per_combination_t CombineChargesAndTargets( const Fragment::charges_t& charges, const elementtargets_t& elementtargets ); /** Recursive function to pick the next target. * * This is used by \sa CombineChargesAndTargets() * * @param charges set of charges, reduced by one per recursion * @param elementtargets targets, map of targets to pick from * @param currenttargets current set of targets, "global" through recursion * @param addFunction bound function to add a set when complete */ void pickLastElementAsTarget( Fragment::charges_t elements, elementtargets_t elementtargets, chargeiters_t& currenttargets, boost::function &addFunction ); /** Converts a list of chargeiters to a list of respective arguments. * * @param positions positions from fragment * @param charges charges associated to each element in \a positions * @param combinations vector of chargeiters * \param globalid refers to the index within the global set of configurations * @return list of arguments */ FunctionModel::arguments_t convertTargetsToArguments( const Fragment::positions_t& positions, const Fragment::charges_t& charges, const targets_per_combination_t combinations, const size_t globalid ); } /** Gather all distances from a given set of positions. * * \param positions all nuclei positions * \param charges all nuclei charges * \param globalid index to associated in argument_t with * \return vector of argument_ , each with a distance */ FunctionModel::arguments_t gatherAllDistanceArguments( const Fragment::positions_t& positions, const Fragment::charges_t& charges, const size_t globalid); /** Gather all distances from a given set of positions. * * Here, we only return one of the two equal distances. * * \param positions all nuclei positions * \param charges all nuclei charges * \param globalid index to associated in argument_t with * \return vector of argument_ , each with a distance */ FunctionModel::arguments_t gatherAllSymmetricDistanceArguments( const Fragment::positions_t& positions, const Fragment::charges_t& charges, const size_t globalid); /** Simple extractor of all unique pair distances of a given \a fragment. * * \param positions all nuclei positions * \param charges all nuclei charges * \param index index refers to the index within the global set of configurations * \return vector of of argument_t containing all found distances */ inline FunctionModel::arguments_t gatherAllDistances( const Fragment::positions_t& positions, const Fragment::charges_t& charges, const size_t index ) { // get distance out of Fragment return gatherAllDistanceArguments(positions, charges, index); } /** Simple extractor of all unique pair distances of a given \a fragment, where * the first index is less than the second one. * * \param positions all nuclei positions * \param charges all nuclei charges * \param index index refers to the index within the global set of configurations * \return vector of of argument_t containing all found distances */ inline FunctionModel::arguments_t gatherAllSymmetricDistances( const Fragment::positions_t& positions, const Fragment::charges_t& charges, const size_t index ) { // get distance out of Fragment return gatherAllSymmetricDistanceArguments(positions, charges, index); } /** Filters only those positions out of given \a fragment that match \a elements. * * \param positions all nuclei positions * \param charges all nuclei charges * \param elements tuple of desired elements * \return vector of positions_t containing */ Fragment::positions_t gatherPositionsFromFragment( const Fragment::positions_t positions, const Fragment::charges_t charges, const Fragment::charges_t& elements ); /** Filters only those distances out of given \a fragment that match \a elements. * * \param positions all nuclei positions * \param charges all nuclei charges * \param elements tuple of desired elements * \param globalid refers to the index within the global set of configurations * \return vector of arguments_t containing those matched with elements */ FunctionModel::arguments_t gatherDistancesFromFragment( const Fragment::positions_t positions, const Fragment::charges_t charges, const Fragment::charges_t& elements, const size_t globalid ); /** Gather all combinations of charges as distance arguments from the fragment. * * E.g. we have a water fragment, i.e. (8,1,1) and we we want elements (8,1), * then two arguments are returned, first to second and first to third. * * With \sa gatherDistancesFromFragment() only the first distance would be * returned. * * @param positions positions in fragment * @param charges charges in fragment * @param elements list of desired elements * @param globalid some global id to discern training data tuples * @return list of arguments with distances */ FunctionModel::arguments_t gatherAllDistancesFromFragment( const Fragment::positions_t& positions, const Fragment::charges_t& charges, const Fragment::charges_t elements, const size_t globalid ); /** Reorder arguments by increasing distance. * * \param listargs list of arguments to reorder each * \return reordered args */ FunctionModel::list_of_arguments_t reorderArgumentsByIncreasingDistance( const FunctionModel::list_of_arguments_t &listargs ); /** Reorder the arguments to bring adjacent ones together. * * After filtering via particle types arguments related via same indices * must not necessarily be contained in the same bunch. This reordering * is done here, preserving the alignment given in * \sa filterArgumentsByParticleTypes() * * \param listargs list of arguments to reorder each * \param _types particle type vector * \return reordered args */ FunctionModel::list_of_arguments_t reorderArgumentsByParticleTypes( const FunctionModel::list_of_arguments_t &eachargs, const ParticleTypes_t &_types ); /** Filter arguments according to types, allowing multiples. * * If particle types is (0,1,2) and three arguments, each with a pair of types, * are given, then the alignment will be: (0,1), (0,2), and (1,2). * * \param args arguments to reorder * \param _types particle type vector * \return filtered list of args */ FunctionModel::list_of_arguments_t filterArgumentsByParticleTypes( const FunctionModel::arguments_t &args, const ParticleTypes_t &_types ); /** Combines two argument lists by sorting and making unique. * * @param firstargs first list of arguments * @param secondargs second list of arguments * @return concatenated lists */ FunctionModel::arguments_t combineArguments( const FunctionModel::arguments_t &firstargs, const FunctionModel::arguments_t &secondargs); /** Combines two argument lists by concatenation. * * @param firstargs first list of arguments * @param secondargs second list of arguments * @return concatenated lists */ FunctionModel::arguments_t concatenateArguments( const FunctionModel::arguments_t &firstargs, const FunctionModel::arguments_t &secondargs); /** Combines two argument lists by concatenation. * * @param firstlistargs first list of argument tuples * @param secondlistargs second list of argument tuples * @return concatenated lists */ FunctionModel::list_of_arguments_t concatenateListOfArguments( const FunctionModel::list_of_arguments_t &firstlistargs, const FunctionModel::list_of_arguments_t &secondlistargs); }; /* namespace Extractors */ #endif /* TRAININGDATA_EXTRACTORS_HPP_ */