-
Notifications
You must be signed in to change notification settings - Fork 0
/
HPFPotentialPair.h
956 lines (829 loc) · 37.8 KB
/
HPFPotentialPair.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
// Copyright (c) 2009-2022 The Regents of the University of Michigan.
// Part of HOOMD-blue, released under the BSD 3-Clause License.
#ifndef __HPF_POTENTIAL_PAIR_H__
#define __HPF_POTENTIAL_PAIR_H__
#include <iostream>
#include <memory>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <stdexcept>
#include "hoomd/ForceCompute.h"
// #include "hoomd/GSDShapeSpecWriter.h"
#include "hoomd/GlobalArray.h"
#include "hoomd/HOOMDMath.h"
#include "hoomd/Index1D.h"
#include "hoomd/managed_allocator.h"
#include "hoomd/md/EvaluatorPairLJ.h"
#include "hoomd/md/NeighborList.h"
#ifdef ENABLE_HIP
#include <hip/hip_runtime.h>
#endif
#ifdef ENABLE_MPI
#include "hoomd/Communicator.h"
#endif
/*! \file HPFPotentialPair.h
\brief Defines the template class for the hard-particle frictional
interaction \note This header cannot be compiled by nvcc
*/
#ifdef __HIPCC__
#error This header cannot be compiled by nvcc
#endif
namespace hoomd
{
namespace md
{
//! Hash function for std::pair
struct pair_hash
{
template<class T1, class T2> size_t operator()(const std::pair<T1, T2>& pair) const
{
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
}
};
//! Template class for computing pair potentials
/*! <b>Overview:</b>
HPFPotentialPair computes standard pair potentials (and forces)
between all particle pairs in the simulation. It employs the use of a
neighbor list to limit the number of computations done to only those
particles with the cutoff radius of each other. The computation of
the actual V(r) is not performed directly by this class, but by an
evaluator class (e.g. EvaluatorPairSpring) which is passed in as a
template parameter so the computations are performed as efficiently
as possible.
<b>Implementation details</b>
rcutsq, and the params are stored per particle type pair. It wastes
a little bit of space, but benchmarks show that storing the symmetric
type pairs and indexing with Index2D is faster than not storing
redundant pairs and indexing with Index2DUpperTriangular. All of
these values are stored in GlobalArray for easy access on the GPU by
a derived class. The type of the parameters is defined by \a
param_type in the potential evaluator class passed in. See the
appropriate documentation for the evaluator for the definition of
each element of the parameters.
*/
template<class evaluator> class HPFPotentialPair : public ForceCompute
{
public:
//! Param type from evaluator
typedef typename evaluator::param_type param_type;
//! Construct the pair potential
HPFPotentialPair(std::shared_ptr<SystemDefinition> sysdef,
std::shared_ptr<NeighborList> nlist,
Scalar mus,
Scalar mur,
Scalar ks,
Scalar kr);
//! Destructor
virtual ~HPFPotentialPair();
//! Set and get the pair parameters for a single type pair
virtual void setParams(unsigned int typ1, unsigned int typ2, const param_type& param);
virtual void setParamsPython(pybind11::tuple typ, pybind11::dict params);
/// Get params for a single type pair using a tuple of strings
virtual pybind11::dict getParams(pybind11::tuple typ);
//! Set the rcut for a single type pair
virtual void setRcut(unsigned int typ1, unsigned int typ2, Scalar rcut);
/// Get the r_cut for a single type pair
Scalar getRCut(pybind11::tuple types);
/// Set the rcut for a single type pair using a tuple of strings
virtual void setRCutPython(pybind11::tuple types, Scalar r_cut);
//! Method that is called whenever the GSD file is written if
//! connected to a GSD file.
int slotWriteGSDShapeSpec(gsd_handle&) const;
/// Validate that types are within Ntypes
void validateTypes(unsigned int typ1, unsigned int typ2, std::string action);
//! Method that is called to connect to the gsd write state signal
void connectGSDShapeSpec(std::shared_ptr<GSDDumpWriter> writer);
// It might be better to just remove these shift methods, since we
// don't plan to use them with this force compute
//! Shifting modes that can be applied to the energy
enum energyShiftMode
{
no_shift = 0
};
//! Set the mode to use for shifting the energy
void setShiftMode(energyShiftMode mode)
{
m_shift_mode = mode;
}
void setShiftModePython(std::string mode)
{
if (mode == "none")
{
m_shift_mode = no_shift;
}
else
{
throw std::runtime_error("Invalid energy shift mode.");
}
}
/// Get the mode used for the energy shifting
std::string getShiftMode()
{
switch (m_shift_mode)
{
case no_shift:
return "none";
default:
throw std::runtime_error("Error setting shift mode.");
}
}
void getHIShearRate(vec3<Scalar>& shear_rate) const
{
shear_rate = vec3<Scalar>(m_hi_shear_rate);
}
void setHIShearRate(vec3<Scalar> shear_rate)
{
m_hi_shear_rate = vec_to_scalar3(shear_rate);
// m_hi_vorticity = vec_to_scalar3(shear_rate); // This is wrong
}
void clearDynamicState()
{
m_dynamic_state_flag = false;
m_xi.clear();
m_psi.clear();
m_pair_idx.clear();
}
virtual void notifyDetach()
{
if (m_attached)
{
m_nlist->removeRCutMatrix(m_r_cut_nlist);
}
if (!m_persist_state_on_detach)
{
clearDynamicState();
}
m_attached = false;
}
#ifdef ENABLE_MPI
//! Get ghost particle fields requested by this pair potential
virtual CommFlags getRequestedCommFlags(uint64_t timestep);
#endif
std::vector<std::string> getTypeShapeMapping() const
{
std::vector<std::string> type_shape_mapping(m_pdata->getNTypes());
for (unsigned int i = 0; i < type_shape_mapping.size(); i++)
{
evaluator eval(Scalar(0.0), Scalar(0.0), this->m_params[m_typpair_idx(i, i)]);
type_shape_mapping[i] = eval.getShapeSpec();
}
return type_shape_mapping;
}
bool m_log_pair_info = false;
bool m_hi = false; //!< Control whether to compute hydrodynamic interactions
Scalar3 m_hi_shear_rate = make_scalar3(0.0, 0.0, 0.0); //!< Shear rate for hydrodynamic interactions
Scalar3 m_hi_vorticity = make_scalar3(0.0, 0.0, 0.0); //!< Shear rate for hydrodynamic interactions
Scalar m_gamma = Scalar(0.0);
protected:
std::shared_ptr<NeighborList> m_nlist; //!< The neighborlist to use for the computation
energyShiftMode m_shift_mode; //!< Store the mode with which to
//!< handle the energy shift at r_cut
Index2D m_typpair_idx; //!< Helper class for indexing per type pair
//!< arrays
GlobalArray<Scalar> m_rcutsq; //!< Cutoff radius squared per type pair
/// Per type pair potential parameters
std::vector<param_type, hoomd::detail::managed_allocator<param_type>> m_params;
/// Track whether we have attached to the Simulation object
bool m_attached = true;
// THE FOUR VARIABLES BELOW ARE STATEFUL! WE NEED TO ENSURE THEY
// HAVE SENSIBLE VALUES AT ALL TIMES! Be careful to engage/disengage
// the dynamic state flag when the force compute is added/removed
// from the system.
bool m_dynamic_state_flag = false;
bool m_persist_state_on_detach = false;
// Dynamically track quantities relevant to contact friction
// Angular momentum quaternion needs to be converted to real space
// frame vector for these computations
std::vector<Scalar3, hoomd::detail::managed_allocator<Scalar3>>
m_xi; //!< transverse surface velocity integrated
std::vector<Scalar3, hoomd::detail::managed_allocator<Scalar3>>
m_psi; //!< rotational surface velocity integrated
/// Maps pairs of particles to indices in the surface velocities
/// This will be necessary to resort the arrays after neighborlist
/// updates
std::unordered_map<std::pair<unsigned int, unsigned int>, unsigned int, pair_hash> m_pair_idx;
// Might replace the above variable with a list
std::unordered_map<unsigned int, Scalar3>
m_w_cache; //!< Cache for the angular velocity of each particle
/// r_cut (not squared) given to the neighbor list
std::shared_ptr<GlobalArray<Scalar>> m_r_cut_nlist;
/// Keep track of number of each type of particle
std::vector<unsigned int> m_num_particles_by_type;
Scalar m_mus = Scalar(0.0); //!< Sliding friction coefficient
Scalar m_mur = Scalar(0.0); //!< Rolling friction coefficient
Scalar m_ks = Scalar(10.0); //!< Sliding friction spring constant
Scalar m_kr = Scalar(10.0); //!< Rolling friction spring constant
#ifdef ENABLE_MPI
/// The system's communicator.
std::shared_ptr<Communicator> m_comm;
#endif
//! Actually compute the forces
virtual void computeForces(uint64_t timestep);
}; // end class HPFPotentialPair
/*! \param sysdef System to compute forces on
\param nlist Neighborlist to use for computing the forces
*/
template<class evaluator>
HPFPotentialPair<evaluator>::HPFPotentialPair(std::shared_ptr<SystemDefinition> sysdef,
std::shared_ptr<NeighborList> nlist,
Scalar mus,
Scalar mur,
Scalar ks,
Scalar kr)
: ForceCompute(sysdef), m_nlist(nlist), m_shift_mode(no_shift),
m_typpair_idx(m_pdata->getNTypes()), m_mus(mus), m_mur(mur), m_ks(ks), m_kr(kr)
{
m_exec_conf->msg->notice(5) << "Constructing HPFPotentialPair<" << evaluator::getName() << ">"
<< std::endl;
assert(m_pdata);
assert(m_nlist);
// initialize empty containers for xi, psi, pair_idx, and w_cache
std::vector<Scalar3, hoomd::detail::managed_allocator<Scalar3>> xi;
std::vector<Scalar3, hoomd::detail::managed_allocator<Scalar3>> psi;
std::unordered_map<std::pair<unsigned int, unsigned int>, unsigned int, pair_hash> pair_idx;
std::unordered_map<unsigned int, Scalar3> w_cache;
auto num_nlist_elements = 0;
xi.reserve(num_nlist_elements);
psi.reserve(num_nlist_elements);
pair_idx.reserve(m_pair_idx.max_load_factor() * num_nlist_elements);
w_cache.reserve(w_cache.max_load_factor() * num_nlist_elements);
m_xi.swap(xi);
m_psi.swap(psi);
m_pair_idx.swap(pair_idx);
m_w_cache.swap(w_cache);
GlobalArray<Scalar> rcutsq(m_typpair_idx.getNumElements(), m_exec_conf);
m_rcutsq.swap(rcutsq);
m_params = std::vector<param_type, hoomd::detail::managed_allocator<param_type>>(
m_typpair_idx.getNumElements(),
param_type(),
hoomd::detail::managed_allocator<param_type>(m_exec_conf->isCUDAEnabled()));
m_r_cut_nlist
= std::make_shared<GlobalArray<Scalar>>(m_typpair_idx.getNumElements(), m_exec_conf);
nlist->addRCutMatrix(m_r_cut_nlist);
#if defined(ENABLE_HIP) && defined(__HIP_PLATFORM_NVCC__)
if (m_pdata->getExecConf()->isCUDAEnabled())
{
// m_params is _always_ in unified memory, so memadvise and
// prefetch
cudaMemAdvise(m_params.data(),
m_params.size() * sizeof(param_type),
cudaMemAdviseSetReadMostly,
0);
auto& gpu_map = m_exec_conf->getGPUIds();
for (unsigned int idev = 0; idev < m_exec_conf->getNumActiveGPUs(); ++idev)
{
cudaMemPrefetchAsync(m_params.data(),
sizeof(param_type) * m_params.size(),
gpu_map[idev]);
}
// m_rcutsq and m_ronsq only in unified memory if
// allConcurrentManagedAccess
if (m_exec_conf->allConcurrentManagedAccess())
{
cudaMemAdvise(m_rcutsq.get(),
m_rcutsq.getNumElements() * sizeof(Scalar),
cudaMemAdviseSetReadMostly,
0);
for (unsigned int idev = 0; idev < m_exec_conf->getNumActiveGPUs(); ++idev)
{
// prefetch data on all GPUs
cudaMemPrefetchAsync(m_rcutsq.get(),
sizeof(Scalar) * m_rcutsq.getNumElements(),
gpu_map[idev]);
}
}
}
#endif
// get number of each type of particle, needed for energy and
// pressure correction
m_num_particles_by_type.resize(m_pdata->getNTypes());
std::fill(m_num_particles_by_type.begin(), m_num_particles_by_type.end(), 0);
ArrayHandle<Scalar4> h_postype(m_pdata->getPositions(),
access_location::host,
access_mode::read);
for (unsigned int i = 0; i < m_pdata->getN(); i++)
{
unsigned int typeid_i = __scalar_as_int(h_postype.data[i].w);
m_num_particles_by_type[typeid_i] += 1;
}
#ifdef ENABLE_MPI
if (m_sysdef->isDomainDecomposed())
{
// reduce number of each type of particle on all processors
MPI_Allreduce(MPI_IN_PLACE,
m_num_particles_by_type.data(),
m_pdata->getNTypes(),
MPI_UNSIGNED,
MPI_SUM,
m_exec_conf->getMPICommunicator());
}
#endif
#ifdef ENABLE_MPI
if (m_sysdef->isDomainDecomposed())
{
auto comm_weak = m_sysdef->getCommunicator();
assert(comm_weak.lock());
m_comm = comm_weak.lock();
}
#endif
}
template<class evaluator> HPFPotentialPair<evaluator>::~HPFPotentialPair()
{
m_exec_conf->msg->notice(5) << "Destroying HPFPotentialPair<" << evaluator::getName() << ">"
<< std::endl;
if (m_attached)
{
m_nlist->removeRCutMatrix(m_r_cut_nlist);
}
}
/*! \param typ1 First type index in the pair
\param typ2 Second type index in the pair
\param param Parameter to set
\note When setting the value for (\a typ1, \a typ2), the parameter
for (\a typ2, \a typ1) is automatically set.
*/
template<class evaluator>
void HPFPotentialPair<evaluator>::setParams(unsigned int typ1,
unsigned int typ2,
const param_type& param)
{
validateTypes(typ1, typ2, "setting params");
m_params[m_typpair_idx(typ1, typ2)] = param;
m_params[m_typpair_idx(typ2, typ1)] = param;
}
template<class evaluator>
void HPFPotentialPair<evaluator>::setParamsPython(pybind11::tuple typ, pybind11::dict params)
{
auto typ1 = m_pdata->getTypeByName(typ[0].cast<std::string>());
auto typ2 = m_pdata->getTypeByName(typ[1].cast<std::string>());
setParams(typ1, typ2, param_type(params, m_exec_conf->isCUDAEnabled()));
}
template<class evaluator> pybind11::dict HPFPotentialPair<evaluator>::getParams(pybind11::tuple typ)
{
auto typ1 = m_pdata->getTypeByName(typ[0].cast<std::string>());
auto typ2 = m_pdata->getTypeByName(typ[1].cast<std::string>());
validateTypes(typ1, typ2, "setting params");
return m_params[m_typpair_idx(typ1, typ2)].asDict();
}
template<class evaluator>
void HPFPotentialPair<evaluator>::validateTypes(unsigned int typ1,
unsigned int typ2,
std::string action)
{
auto n_types = this->m_pdata->getNTypes();
if (typ1 >= n_types || typ2 >= n_types)
{
throw std::runtime_error("Error in" + action + " for pair potential. Invalid type");
}
}
/*! \param typ1 First type index in the pair
\param typ2 Second type index in the pair
\param rcut Cutoff radius to set
\note When setting the value for (\a typ1, \a typ2), the parameter
for (\a typ2, \a typ1) is automatically set.
*/
template<class evaluator>
void HPFPotentialPair<evaluator>::setRcut(unsigned int typ1, unsigned int typ2, Scalar rcut)
{
validateTypes(typ1, typ2, "setting r_cut");
{
// store r_cut**2 for use internally
ArrayHandle<Scalar> h_rcutsq(m_rcutsq, access_location::host, access_mode::readwrite);
h_rcutsq.data[m_typpair_idx(typ1, typ2)] = rcut * rcut;
h_rcutsq.data[m_typpair_idx(typ2, typ1)] = rcut * rcut;
// store r_cut unmodified for so the neighbor list knows what
// particles to include
ArrayHandle<Scalar> h_r_cut_nlist(*m_r_cut_nlist,
access_location::host,
access_mode::readwrite);
h_r_cut_nlist.data[m_typpair_idx(typ1, typ2)] = rcut;
h_r_cut_nlist.data[m_typpair_idx(typ2, typ1)] = rcut;
}
// notify the neighbor list that we have changed r_cut values
m_nlist->notifyRCutMatrixChange();
}
template<class evaluator>
void HPFPotentialPair<evaluator>::setRCutPython(pybind11::tuple types, Scalar r_cut)
{
auto typ1 = m_pdata->getTypeByName(types[0].cast<std::string>());
auto typ2 = m_pdata->getTypeByName(types[1].cast<std::string>());
setRcut(typ1, typ2, r_cut);
}
template<class evaluator> Scalar HPFPotentialPair<evaluator>::getRCut(pybind11::tuple types)
{
auto typ1 = m_pdata->getTypeByName(types[0].cast<std::string>());
auto typ2 = m_pdata->getTypeByName(types[1].cast<std::string>());
validateTypes(typ1, typ2, "getting r_cut.");
ArrayHandle<Scalar> h_rcutsq(m_rcutsq, access_location::host, access_mode::read);
return sqrt(h_rcutsq.data[m_typpair_idx(typ1, typ2)]);
}
template<class evaluator>
void HPFPotentialPair<evaluator>::connectGSDShapeSpec(std::shared_ptr<GSDDumpWriter> writer)
{
typedef hoomd::detail::SharedSignalSlot<int(gsd_handle&)> SlotType;
auto func = std::bind(&HPFPotentialPair<evaluator>::slotWriteGSDShapeSpec,
this,
std::placeholders::_1);
std::shared_ptr<hoomd::detail::SignalSlot> pslot(new SlotType(writer->getWriteSignal(), func));
addSlot(pslot);
}
template<class evaluator>
int HPFPotentialPair<evaluator>::slotWriteGSDShapeSpec(gsd_handle& handle) const
{
hoomd::detail::GSDShapeSpecWriter shapespec(m_exec_conf);
m_exec_conf->msg->notice(10) << "HPFPotentialPair writing to GSD File to name: "
<< shapespec.getName() << std::endl;
int retval = shapespec.write(handle, this->getTypeShapeMapping());
return retval;
}
/*! \post The pair forces are computed for the given timestep. The
neighborlist's compute method is called to ensure that it is up to
date before proceeding.
\param timestep specifies the current time step of the simulation
*/
template<class evaluator> void HPFPotentialPair<evaluator>::computeForces(uint64_t timestep)
{
// start by updating the neighborlist
m_nlist->compute(timestep);
bool nlist_updated = m_nlist->hasBeenUpdated(timestep);
// depending on the neighborlist settings, we can take advantage of
// newton's third law to reduce computations at the cost of memory
// access complexity: set that flag now
bool third_law = m_nlist->getStorageMode() == NeighborList::half;
// access the neighbor list, particle data, and system box
ArrayHandle<unsigned int> h_n_neigh(m_nlist->getNNeighArray(),
access_location::host,
access_mode::read);
ArrayHandle<unsigned int> h_nlist(m_nlist->getNListArray(),
access_location::host,
access_mode::read);
ArrayHandle<size_t> h_head_list(m_nlist->getHeadList(),
access_location::host,
access_mode::read);
ArrayHandle<Scalar4> h_pos(m_pdata->getPositions(), access_location::host, access_mode::read);
ArrayHandle<Scalar4> h_vel(m_pdata->getVelocities(), access_location::host, access_mode::read);
ArrayHandle<Scalar> h_diameter(m_pdata->getDiameters(),
access_location::host,
access_mode::read);
ArrayHandle<Scalar> h_charge(m_pdata->getCharges(), access_location::host, access_mode::read);
ArrayHandle<Scalar4> h_orientation(m_pdata->getOrientationArray(),
access_location::host,
access_mode::read);
ArrayHandle<Scalar4> h_angmom(m_pdata->getAngularMomentumArray(),
access_location::host,
access_mode::read);
ArrayHandle<Scalar3> h_inertia(m_pdata->getMomentsOfInertiaArray(),
access_location::host,
access_mode::read);
ArrayHandle<unsigned int> h_tag(m_pdata->getTags(), access_location::host, access_mode::read);
// force arrays
ArrayHandle<Scalar4> h_force(m_force, access_location::host, access_mode::overwrite);
ArrayHandle<Scalar4> h_torque(m_torque, access_location::host, access_mode::overwrite);
ArrayHandle<Scalar> h_virial(m_virial, access_location::host, access_mode::overwrite);
const BoxDim box = m_pdata->getGlobalBox();
ArrayHandle<Scalar> h_rcutsq(m_rcutsq, access_location::host, access_mode::read);
PDataFlags flags = this->m_pdata->getFlags();
bool compute_virial = flags[pdata_flag::pressure_tensor];
// need to start from a zero force, energy and virial
memset((void*)h_force.data, 0, sizeof(Scalar4) * m_force.getNumElements());
memset((void*)h_force.data, 0, sizeof(Scalar4) * m_torque.getNumElements());
memset((void*)h_virial.data, 0, sizeof(Scalar) * m_virial.getNumElements());
// let's handle the startup and rebuild case
if (!m_dynamic_state_flag || nlist_updated)
{
std::cout << "Rebuild nlist: " << timestep << std::endl;
m_dynamic_state_flag = true;
std::vector<Scalar3, hoomd::detail::managed_allocator<Scalar3>> xi;
std::vector<Scalar3, hoomd::detail::managed_allocator<Scalar3>> psi;
std::unordered_map<std::pair<unsigned int, unsigned int>, unsigned int, pair_hash> pair_idx;
// maybe we don't need to reserve?
xi.reserve(m_xi.size());
psi.reserve(m_psi.size());
pair_idx.reserve(m_pair_idx.bucket_count());
xi.swap(m_xi);
psi.swap(m_psi);
pair_idx.swap(m_pair_idx);
clearDynamicState();
unsigned int idx = 0;
for (int i = 0; i < (int)m_pdata->getN(); i++)
{
auto tag_i = h_tag.data[i];
const size_t myHead = h_head_list.data[i];
const unsigned int size = (unsigned int)h_n_neigh.data[i];
for (unsigned int k = 0; k < size; k++)
{
// access the index of this neighbor (MEM TRANSFER: 1
// scalar)
unsigned int j = h_nlist.data[myHead + k];
auto tag_j = h_tag.data[j];
auto pair = std::make_pair(tag_i, tag_j);
if (pair_idx.empty() || pair_idx.find(pair) == pair_idx.end())
{
m_xi.push_back(make_scalar3(0, 0, 0));
m_psi.push_back(make_scalar3(0, 0, 0));
}
else
{
unsigned int mapped_idx = pair_idx[pair];
m_xi.push_back(xi[mapped_idx]);
m_psi.push_back(psi[mapped_idx]);
}
m_pair_idx[pair] = idx;
idx++;
}
}
}
auto xi_it = m_xi.begin();
auto psi_it = m_psi.begin();
m_w_cache.clear(); // clear angular velocity cache for the upcoming run
// for each particle
for (int i = 0; i < (int)m_pdata->getN(); i++)
{
// access the particle's position and type (MEM TRANSFER: 4
// scalars)
Scalar3 pi = make_scalar3(h_pos.data[i].x, h_pos.data[i].y, h_pos.data[i].z);
unsigned int typei = __scalar_as_int(h_pos.data[i].w);
vec3<Scalar> v_i(h_vel.data[i].x, h_vel.data[i].y, h_vel.data[i].z);
vec3<Scalar> w_i(0.0, 0.0, 0.0);
auto tag_i = h_tag.data[i];
auto search = m_w_cache.find(tag_i);
if (search == m_w_cache.end())
{
quat<Scalar> q_i(h_orientation.data[i]);
quat<Scalar> p_i(h_angmom.data[i]);
Scalar3 I_i(h_inertia.data[i]);
vec3<Scalar> s_i((conj(q_i) * p_i).v / Scalar(2.0));
// I might be able to get rid of the ? operator
// this would assume that any componenet of s_i is always
// zero if I_i is zero
w_i = vec3<Scalar>(I_i.x == 0.0 ? 0.0 : s_i.x / I_i.x,
I_i.y == 0.0 ? 0.0 : s_i.y / I_i.y,
I_i.z == 0.0 ? 0.0 : s_i.z / I_i.z);
w_i = rotate(q_i, w_i); // now rotate into real frame
m_w_cache[tag_i] = make_scalar3(w_i.x, w_i.y, w_i.z);
}
else
{
Scalar3 _w = search->second;
w_i = vec3<Scalar>(_w.x, _w.y, _w.z);
}
// sanity check
assert(typei < m_pdata->getNTypes());
// access diameter and charge (if needed)
Scalar di = Scalar(0.0);
Scalar qi = Scalar(0.0);
// if (evaluator::needsDiameter())
di = Scalar(0.5) * h_diameter.data[i];
if (evaluator::needsCharge())
qi = h_charge.data[i];
// initialize current particle force, potential energy, and
// virial to 0
Scalar3 fi = make_scalar3(0, 0, 0);
Scalar3 ti = make_scalar3(0, 0, 0);
Scalar pei = 0.0;
Scalar virialxxi = 0.0;
Scalar virialxyi = 0.0;
Scalar virialxzi = 0.0;
Scalar virialyyi = 0.0;
Scalar virialyzi = 0.0;
Scalar virialzzi = 0.0;
// loop over all of the neighbors of this particle
const size_t myHead = h_head_list.data[i];
const unsigned int size = (unsigned int)h_n_neigh.data[i];
for (unsigned int k = 0; k < size; k++)
{
// access the index of this neighbor (MEM TRANSFER: 1
// scalar)
unsigned int j = h_nlist.data[myHead + k];
assert(j < m_pdata->getN() + m_pdata->getNGhosts());
// calculate dr_ji (MEM TRANSFER: 3 scalars / FLOPS: 3)
Scalar3 pj = make_scalar3(h_pos.data[j].x, h_pos.data[j].y, h_pos.data[j].z);
Scalar3 dx = pi - pj;
// access the type of the neighbor particle (MEM TRANSFER: 1
// scalar)
unsigned int typej = __scalar_as_int(h_pos.data[j].w);
assert(typej < m_pdata->getNTypes());
// access diameter and charge (if needed)
Scalar dj = Scalar(0.0);
Scalar qj = Scalar(0.0);
// if (evaluator::needsDiameter())
dj = Scalar(0.5) * h_diameter.data[j];
if (evaluator::needsCharge())
qj = h_charge.data[j];
// apply periodic boundary conditions
dx = box.minImage(dx);
// calculate r_ij squared (FLOPS: 5)
Scalar rsq = dot(dx, dx);
// get parameters for this type pair
unsigned int typpair_idx = m_typpair_idx(typei, typej);
param_type param = m_params[typpair_idx];
Scalar rcutsq = h_rcutsq.data[typpair_idx];
// compute the force and potential energy
Scalar force_divr = Scalar(0.0);
Scalar pair_eng = Scalar(0.0);
Scalar r = Scalar(0.0);
Scalar rinv = Scalar(0.0);
evaluator eval(rsq, rcutsq, param);
if (evaluator::needsDiameter())
eval.setDiameter(di, dj);
if (evaluator::needsCharge())
eval.setCharge(qi, qj);
//! This is the normal force of the conservative pair
//! interaction. We'll also need to calculate the
//! non-conservative friction forces if the conservative
//! interaction is non-zero (in contact).
bool evaluated = eval.evalForceAndEnergyHPF(force_divr, pair_eng, r, rinv);
if (evaluated)
{
// grab data from particle j and calculate it if it
// isn't cached
vec3<Scalar> v_j(h_vel.data[j].x, h_vel.data[j].y, h_vel.data[j].z);
vec3<Scalar> w_j(0.0, 0.0, 0.0);
auto tag_j = h_tag.data[j];
//!> NOTE - eventually we probably want to avoid some of
//! these computations if mus or mur are zero
auto search = m_w_cache.find(tag_j);
if (search == m_w_cache.end())
{
quat<Scalar> q_j(h_orientation.data[j]);
quat<Scalar> p_j(h_angmom.data[j]);
Scalar3 I_j(h_inertia.data[j]);
vec3<Scalar> s_j((conj(q_j) * p_j).v / Scalar(2.0));
// I might be able to get rid of the ? operator
// this would assume that any componenet of s_i is
// always zero if I_i is zero
w_j = vec3<Scalar>(I_j.x == 0.0 ? 0.0 : s_j.x / I_j.x,
I_j.y == 0.0 ? 0.0 : s_j.y / I_j.y,
I_j.z == 0.0 ? 0.0 : s_j.z / I_j.z);
w_j = rotate(q_j, w_i); // now rotate into real frame
m_w_cache[tag_j] = make_scalar3(w_j.x, w_j.y, w_j.z);
}
else
{
Scalar3 _w = search->second;
w_j = vec3<Scalar>(_w.x, _w.y, _w.z);
}
// add up conservate and non-conservative forces and
// compute torque
vec3<Scalar> force = force_divr * vec3<Scalar>(dx.x, dx.y, dx.z);
vec3<Scalar> force_slide(0.0, 0.0, 0.0);
vec3<Scalar> force_roll(0.0, 0.0, 0.0);
vec3<Scalar> torque_i(0.0, 0.0, 0.0);
vec3<Scalar> torque_j(0.0, 0.0, 0.0);
vec3<Scalar> xi_ij(*xi_it);
vec3<Scalar> psi_ij(*psi_it);
// non-conservative force and torque
Scalar force_sqr = force_divr * force_divr * rsq;
vec3<Scalar> v_unit_dx(dx.x * rinv, dx.y * rinv, dx.z * rinv);
Scalar a_ij = Scalar(2.0) * di * dj / (di + dj);
force_slide = m_ks * xi_ij;
force_roll = m_kr * psi_ij;
Scalar slide_sqr = dot(force_slide, force_slide);
Scalar roll_sqr = dot(force_roll, force_roll);
if (slide_sqr > m_mus * m_mus * force_sqr)
force_slide *= m_mus * fast::rsqrt(slide_sqr) * force_divr * r;
if (roll_sqr > m_mur * m_mur * force_sqr)
force_roll *= m_mur * fast::rsqrt(roll_sqr) * force_divr * r;
force += force_slide;
auto torque_slide = cross(v_unit_dx, force_slide);
auto torque_roll = cross(v_unit_dx, force_roll);
torque_i = di * torque_slide + a_ij * torque_roll;
ti.x += torque_i.x;
ti.y += torque_i.y;
ti.z += torque_i.z;
vec3<Scalar> ur_pre = (v_j - v_i) - cross(di * w_i + dj * w_j, v_unit_dx);
auto tmp = ur_pre - v_unit_dx * dot(ur_pre, v_unit_dx) * m_deltaT;
*xi_it += make_scalar3(tmp.x, tmp.y, tmp.z);
tmp = a_ij * cross(w_i - w_j, v_unit_dx) * m_deltaT;
*psi_it += make_scalar3(tmp.x, tmp.y, tmp.z);
// TODO need to verify that this is the correct, but
// since we assume the bodies are spherical, their
// torques should just as simple as negating the force
// and multiplying by the other radius
if (third_law)
torque_j = -dj * torque_slide - a_ij * torque_roll;
Scalar3 force2 = make_scalar3(force.x, force.y, force.z) * Scalar(0.5);
// add the force, potential energy and virial to the
// particle i (FLOPS: 8)
fi += make_scalar3(force.x, force.y, force.z);
pei += pair_eng * Scalar(0.5);
if (compute_virial)
{
virialxxi += dx.x * force2.x;
virialxyi += dx.y * force2.x;
virialxzi += dx.z * force2.x;
virialyyi += dx.y * force2.y;
virialyzi += dx.z * force2.y;
virialzzi += dx.z * force2.z;
}
// add the force to particle j if we are using the third
// law (MEM TRANSFER: 10 scalars / FLOPS: 8) only add
// force to local particles
if (third_law && j < m_pdata->getN())
{
unsigned int mem_idx = j;
h_force.data[mem_idx].x -= force.x;
h_force.data[mem_idx].y -= force.y;
h_force.data[mem_idx].z -= force.z;
// might be a bug, buth this should probably be +,
// not - signs
h_torque.data[j].x += torque_j.x;
h_torque.data[j].y += torque_j.y;
h_torque.data[j].z += torque_j.z;
h_force.data[mem_idx].w += pair_eng * Scalar(0.5);
if (compute_virial)
{
h_virial.data[0 * m_virial_pitch + j] += dx.x * force2.x;
h_virial.data[1 * m_virial_pitch + j] += dx.y * force2.x;
h_virial.data[2 * m_virial_pitch + j] += dx.z * force2.x;
h_virial.data[3 * m_virial_pitch + j] += dx.y * force2.y;
h_virial.data[4 * m_virial_pitch + j] += dx.z * force2.y;
h_virial.data[5 * m_virial_pitch + j] += dx.z * force2.z;
}
}
}
else
{
*xi_it = make_scalar3(0.0, 0.0, 0.0);
*psi_it = make_scalar3(0.0, 0.0, 0.0);
}
++xi_it;
++psi_it;
}
if (m_gamma != 0.0)
{
fi.x -= (v_i.x - m_hi_shear_rate.x * pi.y - m_hi_shear_rate.y * pi.z) * m_gamma * m_deltaT;
fi.y -= (v_i.y - m_hi_shear_rate.z * pi.z) * m_gamma * m_deltaT;
fi.z -= v_i.z * m_gamma * m_deltaT;
ti.x -= w_i.x * m_gamma * m_deltaT;
ti.y -= w_i.y * m_gamma * m_deltaT;
ti.z -= w_i.z * m_gamma * m_deltaT;
}
// finally, increment the force, potential energy and virial for
// particle i
unsigned int mem_idx = i;
h_force.data[mem_idx].x += fi.x;
h_force.data[mem_idx].y += fi.y;
h_force.data[mem_idx].z += fi.z;
h_torque.data[mem_idx].x += ti.x;
h_torque.data[mem_idx].y += ti.y;
h_torque.data[mem_idx].z += ti.z;
h_force.data[mem_idx].w += pei;
if (compute_virial)
{
h_virial.data[0 * m_virial_pitch + mem_idx] += virialxxi;
h_virial.data[1 * m_virial_pitch + mem_idx] += virialxyi;
h_virial.data[2 * m_virial_pitch + mem_idx] += virialxzi;
h_virial.data[3 * m_virial_pitch + mem_idx] += virialyyi;
h_virial.data[4 * m_virial_pitch + mem_idx] += virialyzi;
h_virial.data[5 * m_virial_pitch + mem_idx] += virialzzi;
}
}
}
#ifdef ENABLE_MPI
/*! \param timestep Current time step
*/
template<class evaluator>
CommFlags HPFPotentialPair<evaluator>::getRequestedCommFlags(uint64_t timestep)
{
CommFlags flags = CommFlags(0);
if (evaluator::needsCharge())
flags[comm_flag::charge] = 1;
if (evaluator::needsDiameter())
flags[comm_flag::diameter] = 1;
flags |= ForceCompute::getRequestedCommFlags(timestep);
return flags;
}
#endif
namespace detail
{
//! Export this pair potential to python
/*! \param name Name of the class in the exported python module
\tparam T Evaluator type to export.
*/
template<class T> void export_HPFPotentialPair(pybind11::module& m, const std::string& name)
{
pybind11::class_<HPFPotentialPair<T>, ForceCompute, std::shared_ptr<HPFPotentialPair<T>>>
potentialpair(m, name.c_str());
potentialpair
.def(pybind11::init<std::shared_ptr<SystemDefinition>,
std::shared_ptr<NeighborList>,
Scalar,
Scalar,
Scalar,
Scalar>())
.def("setParams", &HPFPotentialPair<T>::setParamsPython)
.def("getParams", &HPFPotentialPair<T>::getParams)
.def("setRCut", &HPFPotentialPair<T>::setRCutPython)
.def("getRCut", &HPFPotentialPair<T>::getRCut)
// .def("_evaluate", &HPFPotentialPair<T>::evaluate)
.def("slotWriteGSDShapeSpec", &HPFPotentialPair<T>::slotWriteGSDShapeSpec)
.def("connectGSDShapeSpec", &HPFPotentialPair<T>::connectGSDShapeSpec)
.def_readwrite("log_pair_info", &HPFPotentialPair<T>::m_log_pair_info)
.def_readwrite("gamma", &HPFPotentialPair<T>::m_gamma)
.def_property("hi_shear_rate", &HPFPotentialPair<T>::getHIShearRate, &HPFPotentialPair<T>::setHIShearRate);
}
} // end namespace detail
} // end namespace md
} // end namespace hoomd
#endif // __HPF_POTENTIAL_PAIR_H__