BALL  1.4.79
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
poseClustering.h
Go to the documentation of this file.
1 // -*- Mode: C++; tab-width: 2; -*-
2 // vi: set ts=2:
3 //
4 
5 #ifndef BALL_DOCKING_POSECLUSTERING_H
6 #define BALL_DOCKING_POSECLUSTERING_H
7 
8 #ifndef BALL_DATATYPE_OPTIONS_H
9 # include <BALL/DATATYPE/options.h>
10 #endif
11 
12 #ifndef BALL_DOCKING_COMMON_CONFORMATIONSET_H
14 #endif
15 
16 #ifndef BALL_MOLMEC_COMMON_SNAPSHOT_H
18 #endif
19 
20 #ifndef BALL_STRUCTURE_ATOMBIJECTION_H
22 #endif
23 
24 #ifndef BALL_KERNEL_SYSTEM_H
25 # include <BALL/KERNEL/system.h>
26 #endif
27 
28 #ifndef BALL_DATATYPE_STRING_H
29 # include <BALL/DATATYPE/string.h>
30 #endif
31 
32 #ifndef BALL_MATHS_VECTOR3_H
33 # include <BALL/MATHS/vector3.h>
34 #endif
35 
36 #ifndef BALL_MATHS_MATRIX44_H
37 # include <BALL/MATHS/matrix44.h>
38 #endif
39 
40 #include <Eigen/Core>
41 
42 #include <boost/shared_ptr.hpp>
43 #include <boost/variant.hpp>
44 #include <boost/graph/adjacency_list.hpp>
45 
46 #include <set>
47 #include <iostream>
48 
49 #ifdef BALL_HAS_TBB
50 # include <tbb/parallel_reduce.h>
51 # include <tbb/blocked_range.h>
52 #endif
53 
54 //#define POSECLUSTERING_DEBUG 1
55 #undef POSECLUSTERING_DEBUG
56 
57 namespace BALL
58 {
117  {
118  public:
122  struct BALL_EXPORT Option
124  {
127  static const String CLUSTER_METHOD;
128 
132 
136 
139  static const String RMSD_TYPE;
140 
143  static const String RUN_PARALLEL;
144 
145  };
146 
149  {
150  static const Index CLUSTER_METHOD;
151  static const float DISTANCE_THRESHOLD;
153  static const Index RMSD_TYPE;
154  static const bool RUN_PARALLEL;
155  static const bool USE_CENTER_OF_MASS_PRECLINK;
156  };
157 
158  enum BALL_EXPORT RMSDType
159  {
161  RIGID_RMSD,
162  CENTER_OF_MASS_DISTANCE
163  };
164 
165  enum BALL_EXPORT RMSDLevelOfDetail
166  {
167  C_ALPHA, //=0
168  HEAVY_ATOMS,
169  BACKBONE,
170  ALL_ATOMS,
171  PROPERTY_BASED_ATOM_BIJECTION
172  };
173 
174  enum BALL_EXPORT ClusterMethod
175  {
177  SLINK_SIBSON,
178  CLINK_DEFAYS,
180  CLINK_ALTHAUS
181  };
182 
184  {
185  public:
187 
188  RigidTransformation(Eigen::Vector3f const& new_trans, Eigen::Matrix3f const& new_rot)
189  : translation(new_trans),
190  rotation(new_rot)
191  {}
192 
193  Eigen::Vector3f translation;
194  Eigen::Matrix3f rotation;
195  };
196 
202  {
203  public:
204  PosePointer(RigidTransformation const* new_trafo, SnapShot const* new_snap = 0)
205  : trafo(new_trafo),
206  snap(new_snap)
207  { }
208 
209  PosePointer(SnapShot const* new_snap)
210  : trafo(0),
211  snap(new_snap)
212  { }
213 
215  SnapShot const* snap;
216  };
217 
219  {
220  public:
223 
224  ClusterProperties& operator=(const ClusterProperties&);
225 
226 #ifdef BALL_HAS_RVALUE_REFERENCES
228  ClusterProperties& operator=(ClusterProperties&&) noexcept;
229 #endif
230 
232  template <class Archive>
233  void serialize(Archive& ar, const unsigned int version);
234 
237  std::set<Index> poses;
238 
242 
251  boost::variant<Eigen::VectorXf, RigidTransformation> center;
252 
255  float merged_at;
256 #ifdef POSECLUSTERING_DEBUG
257 
259  float current_cluster_id;
260 #endif
261  };
262 
263  typedef boost::adjacency_list<boost::vecS,
264  boost::vecS,
265  boost::directedS,
267  boost::no_property,
268  unsigned int> ClusterTree;
269 
270  typedef ClusterTree::vertex_descriptor ClusterTreeNode;
271 
273 
276 
278  PoseClustering();
279 
282  PoseClustering(ConformationSet* poses, float rmsd);
283 
285  PoseClustering(System const& base_system, String transformation_file_name);
286 
288  virtual ~PoseClustering();
290 
291 
294 
297  bool compute();
298 
300 
304 
306  void setConformationSet(ConformationSet* new_set, bool precompute_atombijection = false);
307 
312  void setBaseSystemAndPoses(System const& base_system, std::vector<PosePointer> const& poses);
313 
316  // will be applied upon the current conformation
317  void setBaseSystemAndTransformations(System const& base_system, String transformation_file_name);
318 
320  const ConformationSet* getConformationSet() const {return current_set_;}
321 
323  ConformationSet* getConformationSet() {return current_set_;}
324 
326  const std::vector<RigidTransformation> & getRigidTransformations() const {return transformations_;}
327 
329  std::vector<Vector3> & getCentersOfMass() {return com_;}
330 
332  std::vector<Vector3> const & getCentersOfMass() const {return com_;}
333 
335  const System& getSystem() const;
336 
338  System& getSystem();
339 
341  Size getNumberOfPoses() const {return poses_.size();}
342 
344  Size getNumberOfClusters() const {return clusters_.size();}
345 
348  const std::set<Index>& getCluster(Index i) const;
349 
352  std::set<Index>& getCluster(Index i);
353 
355  Size getClusterSize(Index i) const;
356 
358  float getClusterScore(Index i) const;
359 
361  float getScore(const System sys_a, const System sys_b, Options options) const;
362 
364  AtomBijection& getAtomBijection() {return atom_bijection_;}
365 
367  AtomBijection const& getAtomBijection() const {return atom_bijection_;}
368 
370  void applyTransformation2System(Index i, System& target_system);
371 
373  void convertTransformations2Snaphots();
374 
376  void convertSnaphots2Transformations();
377 
379  float computeCompleteLinkageRMSD(Index i, Options options, bool initialize = true);
380 
382  //float computeCompleteLinkageRMSD(boost::shared_ptr<ConformationSet> cluster, Option options) const;
383 
385  boost::shared_ptr<System> getPose(Index i) const;
386 
388  std::vector<PosePointer> const& getPoses() const {return poses_;}
389 
391  boost::shared_ptr<System> getClusterRepresentative(Index i);
392 
394  Index findClusterRepresentative(Index i);
395 
397  boost::shared_ptr<ConformationSet> getClusterConformationSet(Index i);
398 
400  boost::shared_ptr<ConformationSet> getReducedConformationSet();
401 
415  bool refineClustering(Options const& refined_options);
416 
418 
422  Options options;
424 
427  void setDefaultOptions();
428 
430 
433 
439  static float getRigidRMSD(Eigen::Vector3f const& t_ab, Eigen::Matrix3f const& M_ab, Eigen::Matrix3f const& covariance_matrix);
440 
446  static float getSquaredRigidRMSD(Eigen::Vector3f const& t_ab, Eigen::Matrix3f const& M_ab, Eigen::Matrix3f const& covariance_matrix);
447 
450  static Eigen::Matrix3f computeCovarianceMatrix(System const& system, Index rmsd_level_of_detail = C_ALPHA);
451 
453 
456 
462  std::vector<std::set<Index> > extractClustersForThreshold(float threshold, Size min_size = 0);
463 
467  std::vector<std::set<Index> > extractNBestClusters(Size n);
468 
472  std::vector<std::set<Index> > filterClusters(Size min_size = 1);
473 
476  void serializeWardClusterTree(std::ostream& out, bool binary = false);
477 
480  void deserializeWardClusterTree(std::istream& in, bool binary = false);
481 
484  void exportWardClusterTreeToGraphViz(std::ostream& out);
485 
487 
488 
491  void printClusters(std::ostream& out = std::cout) const;
492 
495  void printClusterScores(std::ostream& out = std::cout);
496 
497 
498  protected:
499 
500 #ifdef BALL_HAS_TBB
501 
503  class ComputeNearestClusterTask_
504  {
505  public:
507  ComputeNearestClusterTask_(PoseClustering* parent,
508  const std::vector<ClusterTreeNode>& active_clusters,
509  Position current_cluster,
510  Index rmsd_type);
511 
513  ComputeNearestClusterTask_(ComputeNearestClusterTask_& cnct, tbb::split);
514 
516  void join(ComputeNearestClusterTask_ const& cnct);
517 
519  void operator() (const tbb::blocked_range<size_t>& r);
520 
522  Position getMinIndex() {return my_min_index_;}
523 
525  float getMinValue() {return my_min_value_;}
526 
527  protected:
528  // the PoseClustering instance that called us
529  PoseClustering* parent_;
530 
531  // the array we work on
532  const std::vector<ClusterTreeNode>& active_clusters_;
533 
534  // the cluster to compare to everything else
535  Position current_cluster_;
536 
537  // the kind of rmsd computation desired
538  Index rmsd_type_;
539 
540  // the minimum index in our own block
541  Position my_min_index_;
542 
543  // the minimum value in our own block
544  float my_min_value_;
545  };
546 #endif
547 
551  {
552  public:
553  ClusterTreeWriter_(ClusterTree const* cluster_tree)
554  : cluster_tree_(cluster_tree)
555  { }
556 
557  void operator() (std::ostream& out, const ClusterTreeNode& v) const;
558 
559  protected:
560  ClusterTree const* cluster_tree_;
561  };
562 
566  {
567  public:
568  ClusterTreeNodeComparator(ClusterTree& cluster_tree)
569  : cluster_tree_(&cluster_tree)
570  {}
571 
572  bool operator() (ClusterTreeNode const first, ClusterTreeNode const second) const
573  {
574  float first_value = (*cluster_tree_)[ first].merged_at;
575  float second_value = (*cluster_tree_)[second].merged_at;
576 
577  return first_value < second_value;
578  }
579 
580  protected:
581  ClusterTree* cluster_tree_;
582  };
583 
584  // trivial complete linkage implementation
585  // with O(n^2) space request
586  bool trivialCompute_();
587 
588  // space efficient (SLINK or CLINK) clustering
589  bool linearSpaceCompute_();
590 
591  //
592  bool althausCompute_();
593 
594  // implementation of a single linkage clustering as described in
595  // R. Sibson: SLINK: an optimally efficient algorithm for the single-link cluster method.
596  // The Computer Journal. 16, 1, British Computer Society, 1973, p. 30-34
597  void slinkInner_(int current_level);
598 
599  // implementation of a complete linkage clustering as described in
600  // D. Defays: An efficient algorithm for a complete link method.
601  // The Computer Journal. 20, 4, British Computer Society, 1977, p. 364-366.
602  void clinkInner_(int current_level);
603 
604  // implememtation of the nearest neighbor chain clustering algorithm
605  // as described in:
606  // Murtagh, Fionn (1983): "A survey of recent advances in hierarchical clustering algorithms",
607  // The Computer Journal 26 (4): 354–359
608  bool nearestNeighborChainCompute_();
609 
610  void initWardDistance_(Index rmsd_type);
611 
612  void updateWardDistance_(ClusterTreeNode parent, ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type);
613 
614  float computeWardDistance_(ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type);
615 
616  std::set<Index> collectClusterBelow_(ClusterTreeNode const& v);
617 
618  // compute the center of masses
619  void computeCenterOfMasses_();
620 
621  // precompute an atom bijection for faster access
622  void precomputeAtomBijection_();
623 
624  // check the given atom wrt choice of option RMSD_LEVEL_OF_DETAIL
625  bool static isExcludedByLevelOfDetail_(Atom const* atom, Index rmsd_level_of_detail);
626 
627  // distance between cluster i and cluster j
628  float getClusterRMSD_(Index i, Index j, Index rmsd_type);
629 
630  // reads the poses given as transformations from a file
631  // Note: the previously given system will be taken
632  // as untransformed reference, e.g. all transformations
633  // will be applied upon the current conformation
634  bool readTransformationsFromFile_(String filename);
635 
636  // compute the RMSD between two "poses"
637  float getRMSD_(Index i, Index j, Index rmsd_type);
638 
639  // store pointers to the snapshots in the poses vector
640  void storeSnapShotReferences_();
641 
642  //
643  void printCluster_(Index i, std::ostream& out = std::cout) const;
644 
645  //
646  void printVariables_(int a, int b, double c, int d, double e, int current_level);
647 
648  //
649  void clear_();
650 
651  // only used by trivial clustering
652  Eigen::MatrixXd pairwise_scores_;
653 
656 
658  std::vector< std::set<Index> > clusters_;
659 
660  std::vector< Index > cluster_representatives_;
661 
663  std::vector< float > cluster_scores_;
664 
667 
668 
669  // ----- unified access to the poses, independent of their type
670  // (the poses are either stored in transformations_, or in current_set_)
671  std::vector<PosePointer> poses_;
672 
673  // ----- data structure for transformation input (instead of snapshots)
674  std::vector<RigidTransformation> transformations_;
675 
676  Eigen::Matrix3f covariance_matrix_;
677 
678  // TODO: maybe use a const - ptr instead?
680 
681  // the reference state
683 
684  // flag indicating the use of transformation as input
686 
687  // do we need to delete the conformation set, that was
688  // created by converting transformations to snapshots
690 
691  // ------ data structures for slink and clink
692 
693  // stores the distance at which this indexed element has longer
694  // the largest index of its cluster
695  std::vector<double> lambda_;
696 
697  // the index of the cluster representative at merge-time
698  // (element with largest index)
699  std::vector<int> pi_;
700 
701  std::vector<double> mu_;
702 
703 
704  // ----- data structures for nearest neighbor chain ward
706 
707  // ----- data structure for CENTER_OF_GRAVITY_CLINK
708 
709  // the geometric center of mass
710  std::vector<Vector3> com_;
711 
712  // ----- general data structures
713 
714  // We cache the atom bijection for faster
715  // RMSD computation; this is possible, since the system topology does
716  // not change
718 
719  // helper dummies to speed up snapshot application
722 
724  ClusterTree cluster_tree_;
725  }; //class PoseClustering
726 } //namesspace BALL
727 
728 #endif // BALL_DOCKING_POSECLUSTERING_H
#define BALL_CREATE(name)
Definition: create.h:62
std::vector< double > mu_
std::vector< double > lambda_
static const String RMSD_TYPE
std::vector< Vector3 > & getCentersOfMass()
returns the centers of mass-vector (non-empty only for CENTER_OF_MASS_DISTANCE)
static const Index RMSD_TYPE
std::vector< PosePointer > const & getPoses() const
returns poses as PosePointer
RIGID_RMSD
static const bool RUN_PARALLEL
const std::vector< RigidTransformation > & getRigidTransformations() const
returns the poses as rigid transformations
CLINK_DEFAYS
std::vector< std::set< Index > > clusters_
the clusters: sets of pose indices
std::vector< Vector3 > com_
static const String CLUSTER_METHOD
RigidTransformation(Eigen::Vector3f const &new_trans, Eigen::Matrix3f const &new_rot)
static const String RUN_PARALLEL
boost::adjacency_list< boost::vecS, boost::vecS, boost::directedS, ClusterProperties, boost::no_property, unsigned int > ClusterTree
TRIVIAL_COMPLETE_LINKAGE
Eigen::Matrix3f covariance_matrix_
static const float DISTANCE_THRESHOLD
std::vector< int > pi_
SLINK_SIBSON
const ConformationSet * getConformationSet() const
returns the poses to be clustered as ConformationSet
ClusterTreeWriter_(ClusterTree const *cluster_tree)
ClusterTreeNodeComparator(ClusterTree &cluster_tree)
Index rmsd_level_of_detail_
the RMSD definition used for clustering
RigidTransformation const * trafo
HEAVY_ATOMS
PosePointer(RigidTransformation const *new_trafo, SnapShot const *new_snap=0)
BALL_EXTERN_VARIABLE const double c
Definition: constants.h:149
static const Index CLUSTER_METHOD
std::vector< Index > cluster_representatives_
BACKBONE
boost::variant< Eigen::VectorXf, RigidTransformation > center
AtomBijection const & getAtomBijection() const
returns a const reference to the cached AtomBijection
Eigen::MatrixXd pairwise_scores_
Computation of clusters of docking poses.
ConformationSet * current_set_
the ConformationSet we wish to cluster
static const String DISTANCE_THRESHOLD
Size getNumberOfClusters() const
returns the number of clusters found
Size getNumberOfPoses() const
returns the number of poses
static const String RMSD_LEVEL_OF_DETAIL
SNAPSHOT_RMSD
PosePointer(SnapShot const *new_snap)
ConformationSet * getConformationSet()
returns the poses to be clustered as ConformationSet
std::vector< Vector3 > const & getCentersOfMass() const
returns the centers of mass-vector, const version (non-empty only for CENTER_OF_MASS_DISTANCE) ...
std::vector< float > cluster_scores_
the scores of the clusters
-*- Mode: C++; tab-width: 2; -*-
Definition: constants.h:12
std::vector< PosePointer > poses_
static const Index RMSD_LEVEL_OF_DETAIL
static const bool USE_CENTER_OF_MASS_PRECLINK
C_ALPHA
ClusterTree cluster_tree_
The tree built during hierarchical clustering.
ALL_ATOMS
std::vector< RigidTransformation > transformations_
AtomBijection & getAtomBijection()
returns a reference to the cached AtomBijection
BALL_INDEX_TYPE Index
Default values for options.
NEAREST_NEIGHBOR_CHAIN_WARD
ClusterTree::vertex_descriptor ClusterTreeNode
#define BALL_EXPORT
Definition: COMMON/global.h:50
AtomBijection atom_bijection_
BALL_SIZE_TYPE Position