-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadept_source.h
1181 lines (1065 loc) · 38 KB
/
adept_source.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* adept_source.h - Source code for the Adept library
Copyright (C) 2012-2015 The University of Reading
Licensed under the Apache License, Version 2.0 (the "License"); you
may not use this file except in compliance with the License. You
may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing
permissions and limitations under the License.
This file was created automatically by script ./create_adept_source_header
on Wed Jun 3 15:45:14 BST 2015
It contains a concatenation of the source files from the Adept
library. The idea is that a program may #include this file in one of
its source files (typically the one containing the main function),
and then the Adept library will be built into the executable without
the need to link to an external library. All other source files
should just #include "adept.h". The ability to use Adept in this
way makes it easier to distribute an Adept package that is usable on
non-Unix platforms that are unable to use the configure script to
build external libraries.
The individual source files now follow.
*/
#ifndef ADEPT_SOURCE_H
#define ADEPT_SOURCE_H 1
// =================================================================
// Contents of adept.cpp
// =================================================================
/* adept.cpp -- Fast automatic differentiation with expression templates
Copyright (C) 2012-2015 The University of Reading
Author: Robin Hogan <r.j.hogan@reading.ac.uk>
This file is part of the Adept library.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <iostream>
#include <cstring> // For memcpy
#ifdef _OPENMP
#include <omp.h>
#endif
#include "adept.h"
#ifdef HAVE_CONFIG_H
// Obtain compiler (CXX) and compile flags (CXXFLAGS) from config.h
#include "config.h"
#endif
namespace adept {
// Global pointers to the current thread, the second of which is
// thread safe. The first is only used if ADEPT_STACK_THREAD_UNSAFE
// is defined.
ADEPT_THREAD_LOCAL Stack* _stack_current_thread = 0;
Stack* _stack_current_thread_unsafe = 0;
// Return the compiler used to compile the Adept library (e.g. "g++
// [4.3.2]" or "Microsoft Visual C++ [1800]")
std::string
compiler_version()
{
#ifdef CXX
std::string cv = CXX; // Defined in config.h
#elif defined(_MSC_VER)
std::string cv = "Microsoft Visual C++";
#else
std::string cv = "unknown";
#endif
#ifdef __GNUC__
#define STRINGIFY3(A,B,C) STRINGIFY(A) "." STRINGIFY(B) "." STRINGIFY(C)
#define STRINGIFY(A) #A
cv += " [" STRINGIFY3(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__) "]";
#undef STRINGIFY
#undef STRINGIFY3
#elif defined(_MSC_VER)
#define STRINGIFY1(A) STRINGIFY(A)
#define STRINGIFY(A) #A
cv += " [" STRINGIFY1(_MSC_VER) "]";
#undef STRINGIFY
#undef STRINGIFY1
#endif
return cv;
}
// Return the compiler flags used when compiling the Adept library
// (e.g. "-Wall -g -O3")
std::string
compiler_flags()
{
#ifdef CXXFLAGS
return CXXFLAGS; // Defined in config.h
#else
return "unknown";
#endif
}
// MEMBER FUNCTIONS OF THE STACK CLASS
// Destructor: frees dynamically allocated memory (if any)
Stack::~Stack() {
// If this is the currently active stack then set to NULL as
// "this" is shortly to become invalid
if (is_thread_unsafe_) {
if (_stack_current_thread_unsafe == this) {
_stack_current_thread_unsafe = 0;
}
}
else if (_stack_current_thread == this) {
_stack_current_thread = 0;
}
#ifndef ADEPT_STACK_STORAGE_STL
if (statement_) {
delete[] statement_;
}
if (gradient_) {
delete[] gradient_;
}
if (multiplier_) {
delete[] multiplier_;
}
if (offset_) {
delete[] offset_;
}
#endif
}
// Make this stack "active" by copying its "this" pointer to a
// global variable; this makes it the stack that aReal objects
// subsequently interact with when being created and participating
// in mathematical expressions
void
Stack::activate()
{
// Check that we don't already have an active stack in this thread
if ((is_thread_unsafe_ && _stack_current_thread_unsafe
&& _stack_current_thread_unsafe != this)
|| ((!is_thread_unsafe_) && _stack_current_thread
&& _stack_current_thread != this)) {
throw(stack_already_active());
}
else {
if (!is_thread_unsafe_) {
_stack_current_thread = this;
}
else {
_stack_current_thread_unsafe = this;
}
}
}
// This function is called by the constructor to initialize memory,
// which can be grown subsequently
void
Stack::initialize(Offset n)
{
#ifdef ADEPT_STACK_STORAGE_STL
statement_.reserve(n);
multiplier_.reserve(n);
offset_.reserve(n);
#else
multiplier_ = new Real[n];
offset_ = new Offset[n];
n_allocated_operations_ = n;
statement_ = new Statement[n];
n_allocated_statements_ = n;
#endif
new_recording();
// statement_[0].offset = -1;
// statement_[0].end_plus_one = 0;
}
#ifndef ADEPT_STACK_STORAGE_STL
// Double the size of the operation stack, or grow it even more if
// the requested minimum number of extra entries (min) is greater
// than this would allow
void
Stack::grow_operation_stack(Offset min)
{
Offset new_size = 2*n_allocated_operations_;
if (min > 0 && new_size < n_allocated_operations_+min) {
new_size += min;
}
Real* new_multiplier = new Real[new_size];
Offset* new_offset = new Offset[new_size];
std::memcpy(new_multiplier, multiplier_, n_operations_*sizeof(Real));
std::memcpy(new_offset, offset_, n_operations_*sizeof(Offset));
delete[] multiplier_;
delete[] offset_;
multiplier_ = new_multiplier;
offset_ = new_offset;
n_allocated_operations_ = new_size;
}
// ... likewise for the statement stack
void
Stack::grow_statement_stack(Offset min)
{
Offset new_size = 2*n_allocated_statements_;
if (min > 0 && new_size < n_allocated_statements_+min) {
new_size += min;
}
Statement* new_statement = new Statement[new_size];
std::memcpy(new_statement, statement_,
n_statements_*sizeof(Statement));
delete[] statement_;
statement_ = new_statement;
n_allocated_statements_ = new_size;
}
#endif
// Set the maximum number of threads to be used in Jacobian
// calculations, if possible. A value of 1 indicates that OpenMP
// will not be used, while a value of 0 indicates that the number
// will match the number of available processors. Returns the
// maximum that will be used, which will be 1 if the Adept library
// was compiled without OpenMP support. Note that a value of 1 will
// disable the use of OpenMP with Adept, so Adept will then use no
// OpenMP directives or function calls. Note that if in your program
// you use OpenMP with each thread performing automatic
// differentiaion with its own independent Adept stack, then
// typically only one OpenMP thread is available for each Jacobian
// calculation, regardless of whether you call this function.
int
Stack::set_max_jacobian_threads(int n)
{
#ifdef _OPENMP
if (have_openmp_) {
if (n == 1) {
openmp_manually_disabled_ = true;
return 1;
}
else if (n < 1) {
openmp_manually_disabled_ = false;
omp_set_num_threads(omp_get_num_procs());
return omp_get_max_threads();
}
else {
openmp_manually_disabled_ = false;
omp_set_num_threads(n);
return omp_get_max_threads();
}
}
#endif
return 1;
}
// Return maximum number of OpenMP threads to be used in Jacobian
// calculation
int
Stack::max_jacobian_threads() const
{
#ifdef _OPENMP
if (have_openmp_) {
if (openmp_manually_disabled_) {
return 1;
}
else {
return omp_get_max_threads();
}
}
#endif
return 1;
}
// Perform to adjoint computation (reverse mode). It is assumed that
// some gradients have been assigned already, otherwise the function
// returns with an error.
void
Stack::compute_adjoint()
{
if (gradients_are_initialized()) {
// Loop backwards through the derivative statements
for (Offset ist = n_statements_-1; ist > 0; ist--) {
const Statement& statement = statement_[ist];
// We copy the RHS gradient (LHS in the original derivative
// statement but swapped in the adjoint equivalent) to "a" in
// case it appears on the LHS in any of the following statements
Real a = gradient_[statement.offset];
gradient_[statement.offset] = 0.0;
// By only looping if a is non-zero we gain a significant speed-up
if (a != 0.0) {
// Loop over operations
for (Offset i = statement_[ist-1].end_plus_one;
i < statement.end_plus_one; i++) {
gradient_[offset_[i]] += multiplier_[i]*a;
}
}
}
}
else {
throw(gradients_not_initialized());
}
}
// Perform tangent linear computation (forward mode). It is assumed
// that some gradients have been assigned already, otherwise the
// function returns with an error.
void
Stack::compute_tangent_linear()
{
if (gradients_are_initialized()) {
// Loop forward through the statements
for (Offset ist = 1; ist < n_statements_; ist++) {
const Statement& statement = statement_[ist];
// We copy the LHS to "a" in case it appears on the RHS in any
// of the following statements
Real a = 0.0;
for (Offset i = statement_[ist-1].end_plus_one;
i < statement.end_plus_one; i++) {
a += multiplier_[i]*gradient_[offset_[i]];
}
gradient_[statement.offset] = a;
}
}
else {
throw(gradients_not_initialized());
}
}
// Compute the Jacobian matrix; note that jacobian_out must be
// allocated to be of size m*n, where m is the number of dependent
// variables and n is the number of independents. The independents
// and dependents must have already been identified with the
// functions "independent" and "dependent", otherwise this function
// will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. In the
// resulting matrix, the "m" dimension of the matrix varies
// fastest. This is implemented using a forward pass, appropriate
// for m>=n.
void
Stack::jacobian_forward(Real* jacobian_out)
{
if (independent_offset_.empty() || dependent_offset_.empty()) {
throw(dependents_or_independents_not_identified());
}
#ifdef _OPENMP
if (have_openmp_
&& !openmp_manually_disabled_
&& n_independent() > ADEPT_MULTIPASS_SIZE
&& omp_get_max_threads() > 1) {
// Call the parallel version
jacobian_forward_openmp(jacobian_out);
return;
}
#endif
gradient_multipass_.resize(max_gradient_);
// For optimization reasons, we process a block of
// ADEPT_MULTIPASS_SIZE columns of the Jacobian at once; calculate
// how many blocks are needed and how many extras will remain
Offset n_block = n_independent() / ADEPT_MULTIPASS_SIZE;
Offset n_extra = n_independent() % ADEPT_MULTIPASS_SIZE;
Offset i_independent = 0; // Index of first column in the block we
// are currently computing
// Loop over blocks of ADEPT_MULTIPASS_SIZE columns
for (Offset iblock = 0; iblock < n_block; iblock++) {
// Set the initial gradients all to zero
zero_gradient_multipass();
// Each seed vector has one non-zero entry of 1.0
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
gradient_multipass_[independent_offset_[i_independent+i]][i] = 1.0;
}
// Loop forward through the derivative statements
for (Offset ist = 1; ist < n_statements_; ist++) {
const Statement& statement = statement_[ist];
// We copy the LHS to "a" in case it appears on the RHS in any
// of the following statements
Block<ADEPT_MULTIPASS_SIZE,Real> a; // Initialized to zero
// automatically
// Loop through operations
for (Offset iop = statement_[ist-1].end_plus_one;
iop < statement.end_plus_one; iop++) {
if (multiplier_[iop] == 1.0) {
// Loop through columns within this block; we hope the
// compiler can optimize this loop
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
a[i] += gradient_multipass_[offset_[iop]][i];
}
}
else {
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
a[i] += multiplier_[iop]*gradient_multipass_[offset_[iop]][i];
}
}
}
// Copy the results
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
gradient_multipass_[statement.offset][i] = a[i];
}
} // End of loop over statements
// Copy the gradients corresponding to the dependent variables
// into the Jacobian matrix
for (Offset idep = 0; idep < n_dependent(); idep++) {
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
jacobian_out[(i_independent+i)*n_dependent()+idep]
= gradient_multipass_[dependent_offset_[idep]][i];
}
}
i_independent += ADEPT_MULTIPASS_SIZE;
} // End of loop over blocks
// Now do the same but for the remaining few columns in the matrix
if (n_extra > 0) {
zero_gradient_multipass();
for (Offset i = 0; i < n_extra; i++) {
gradient_multipass_[independent_offset_[i_independent+i]][i] = 1.0;
}
for (Offset ist = 1; ist < n_statements_; ist++) {
const Statement& statement = statement_[ist];
Block<ADEPT_MULTIPASS_SIZE,Real> a;
for (Offset iop = statement_[ist-1].end_plus_one;
iop < statement.end_plus_one; iop++) {
if (multiplier_[iop] == 1.0) {
for (Offset i = 0; i < n_extra; i++) {
a[i] += gradient_multipass_[offset_[iop]][i];
}
}
else {
for (Offset i = 0; i < n_extra; i++) {
a[i] += multiplier_[iop]*gradient_multipass_[offset_[iop]][i];
}
}
}
for (Offset i = 0; i < n_extra; i++) {
gradient_multipass_[statement.offset][i] = a[i];
}
}
for (Offset idep = 0; idep < n_dependent(); idep++) {
for (Offset i = 0; i < n_extra; i++) {
jacobian_out[(i_independent+i)*n_dependent()+idep]
= gradient_multipass_[dependent_offset_[idep]][i];
}
}
}
}
// Compute the Jacobian matrix; note that jacobian_out must be
// allocated to be of size m*n, where m is the number of dependent
// variables and n is the number of independents. The independents
// and dependents must have already been identified with the
// functions "independent" and "dependent", otherwise this function
// will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. In the
// resulting matrix, the "m" dimension of the matrix varies
// fastest. This is implemented using a reverse pass, appropriate
// for m<n.
void
Stack::jacobian_reverse(Real* jacobian_out)
{
if (independent_offset_.empty() || dependent_offset_.empty()) {
throw(dependents_or_independents_not_identified());
}
#ifdef _OPENMP
if (have_openmp_
&& !openmp_manually_disabled_
&& n_dependent() > ADEPT_MULTIPASS_SIZE
&& omp_get_max_threads() > 1) {
// Call the parallel version
jacobian_reverse_openmp(jacobian_out);
return;
}
#endif
gradient_multipass_.resize(max_gradient_);
// For optimization reasons, we process a block of
// ADEPT_MULTIPASS_SIZE rows of the Jacobian at once; calculate
// how many blocks are needed and how many extras will remain
Offset n_block = n_dependent() / ADEPT_MULTIPASS_SIZE;
Offset n_extra = n_dependent() % ADEPT_MULTIPASS_SIZE;
Offset i_dependent = 0; // Index of first row in the block we are
// currently computing
// Loop over the of ADEPT_MULTIPASS_SIZE rows
for (Offset iblock = 0; iblock < n_block; iblock++) {
// Set the initial gradients all to zero
zero_gradient_multipass();
// Each seed vector has one non-zero entry of 1.0
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
gradient_multipass_[dependent_offset_[i_dependent+i]][i] = 1.0;
}
// Loop backward through the derivative statements
for (Offset ist = n_statements_-1; ist > 0; ist--) {
const Statement& statement = statement_[ist];
// We copy the RHS to "a" in case it appears on the LHS in any
// of the following statements
Real a[ADEPT_MULTIPASS_SIZE];
#if ADEPT_MULTIPASS_SIZE > ADEPT_MULTIPASS_SIZE_ZERO_CHECK
// For large blocks, we only process the ones where a[i] is
// non-zero
Offset i_non_zero[ADEPT_MULTIPASS_SIZE];
#endif
Offset n_non_zero = 0;
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
a[i] = gradient_multipass_[statement.offset][i];
gradient_multipass_[statement.offset][i] = 0.0;
if (a[i] != 0.0) {
#if ADEPT_MULTIPASS_SIZE > ADEPT_MULTIPASS_SIZE_ZERO_CHECK
i_non_zero[n_non_zero++] = i;
#else
n_non_zero = 1;
#endif
}
}
// Only do anything for this statement if any of the a values
// are non-zero
if (n_non_zero) {
// Loop through the operations
for (Offset iop = statement_[ist-1].end_plus_one;
iop < statement.end_plus_one; iop++) {
// Try to minimize pointer dereferencing by making local
// copies
register Real multiplier = multiplier_[iop];
register Real* __restrict gradient_multipass
= &(gradient_multipass_[offset_[iop]][0]);
#if ADEPT_MULTIPASS_SIZE > ADEPT_MULTIPASS_SIZE_ZERO_CHECK
// For large blocks, loop over only the indices
// corresponding to non-zero a
for (Offset i = 0; i < n_non_zero; i++) {
gradient_multipass[i_non_zero[i]] += multiplier*a[i_non_zero[i]];
}
#else
// For small blocks, do all indices
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
gradient_multipass[i] += multiplier*a[i];
}
#endif
}
}
} // End of loop over statement
// Copy the gradients corresponding to the independent variables
// into the Jacobian matrix
for (Offset iindep = 0; iindep < n_independent(); iindep++) {
for (Offset i = 0; i < ADEPT_MULTIPASS_SIZE; i++) {
jacobian_out[iindep*n_dependent()+i_dependent+i]
= gradient_multipass_[independent_offset_[iindep]][i];
}
}
i_dependent += ADEPT_MULTIPASS_SIZE;
} // End of loop over blocks
// Now do the same but for the remaining few rows in the matrix
if (n_extra > 0) {
zero_gradient_multipass();
for (Offset i = 0; i < n_extra; i++) {
gradient_multipass_[dependent_offset_[i_dependent+i]][i] = 1.0;
}
for (Offset ist = n_statements_-1; ist > 0; ist--) {
const Statement& statement = statement_[ist];
Real a[ADEPT_MULTIPASS_SIZE];
#if ADEPT_MULTIPASS_SIZE > ADEPT_MULTIPASS_SIZE_ZERO_CHECK
Offset i_non_zero[ADEPT_MULTIPASS_SIZE];
#endif
Offset n_non_zero = 0;
for (Offset i = 0; i < n_extra; i++) {
a[i] = gradient_multipass_[statement.offset][i];
gradient_multipass_[statement.offset][i] = 0.0;
if (a[i] != 0.0) {
#if ADEPT_MULTIPASS_SIZE > ADEPT_MULTIPASS_SIZE_ZERO_CHECK
i_non_zero[n_non_zero++] = i;
#else
n_non_zero = 1;
#endif
}
}
if (n_non_zero) {
for (Offset iop = statement_[ist-1].end_plus_one;
iop < statement.end_plus_one; iop++) {
register Real multiplier = multiplier_[iop];
register Real* __restrict gradient_multipass
= &(gradient_multipass_[offset_[iop]][0]);
#if ADEPT_MULTIPASS_SIZE > ADEPT_MULTIPASS_SIZE_ZERO_CHECK
for (Offset i = 0; i < n_non_zero; i++) {
gradient_multipass[i_non_zero[i]] += multiplier*a[i_non_zero[i]];
}
#else
for (Offset i = 0; i < n_extra; i++) {
gradient_multipass[i] += multiplier*a[i];
}
#endif
}
}
}
for (Offset iindep = 0; iindep < n_independent(); iindep++) {
for (Offset i = 0; i < n_extra; i++) {
jacobian_out[iindep*n_dependent()+i_dependent+i]
= gradient_multipass_[independent_offset_[iindep]][i];
}
}
}
}
// If an aReal object is deleted, its gradient_offset is
// unregistered from the stack. If this is at the top of the stack
// then this is easy and is done inline; this is the usual case
// since C++ trys to deallocate automatic objects in the reverse
// order to that in which they were allocated. If it is not at the
// top of the stack then a non-inline function is called to ensure
// that the gap list is adjusted correctly.
void
Stack::unregister_gradient_not_top(const Offset& gradient_offset)
{
enum {
ADDED_AT_BASE,
ADDED_AT_TOP,
NEW_GAP,
NOT_FOUND
} status = NOT_FOUND;
// First try to find if the unregistered element is at the
// start or end of an existing gap
if (!gap_list_.empty() && most_recent_gap_ != gap_list_.end()) {
// We have a "most recent" gap - check whether the gradient
// to be unregistered is here
Gap& current_gap = *most_recent_gap_;
if (gradient_offset == current_gap.start - 1) {
current_gap.start--;
status = ADDED_AT_BASE;
}
else if (gradient_offset == current_gap.end + 1) {
current_gap.end++;
status = ADDED_AT_TOP;
}
// Should we check for erroneous removal from middle of gap?
}
if (status == NOT_FOUND) {
// Search other gaps
for (GapListIterator it = gap_list_.begin();
it != gap_list_.end(); it++) {
if (gradient_offset <= it->end + 1) {
// Gradient to unregister is either within the gap
// referenced by iterator "it", or it is between "it"
// and the previous gap in the list
if (gradient_offset == it->start - 1) {
status = ADDED_AT_BASE;
it->start--;
most_recent_gap_ = it;
}
else if (gradient_offset == it->end + 1) {
status = ADDED_AT_TOP;
it->end++;
most_recent_gap_ = it;
}
else {
// Insert a new gap of width 1; note that list::insert
// inserts *before* the specified location
most_recent_gap_
= gap_list_.insert(it, Gap(gradient_offset));
status = NEW_GAP;
}
break;
}
}
if (status == NOT_FOUND) {
gap_list_.push_back(Gap(gradient_offset));
most_recent_gap_ = gap_list_.end();
most_recent_gap_--;
}
}
// Finally check if gaps have merged
if (status == ADDED_AT_BASE
&& most_recent_gap_ != gap_list_.begin()) {
// Check whether the gap has merged with the next one
GapListIterator it = most_recent_gap_;
it--;
if (it->end == most_recent_gap_->start - 1) {
// Merge two gaps
most_recent_gap_->start = it->start;
gap_list_.erase(it);
}
}
else if (status == ADDED_AT_TOP) {
GapListIterator it = most_recent_gap_;
it++;
if (it != gap_list_.end()
&& it->start == most_recent_gap_->end + 1) {
// Merge two gaps
most_recent_gap_->end = it->end;
gap_list_.erase(it);
}
}
}
// Compute the Jacobian matrix; note that jacobian_out must be
// allocated to be of size m*n, where m is the number of dependent
// variables and n is the number of independents. In the resulting
// matrix, the "m" dimension of the matrix varies fastest. This is
// implemented by calling one of jacobian_forward and
// jacobian_reverse, whichever would be faster.
void
Stack::jacobian(Real* jacobian_out)
{
if (n_independent() <= n_dependent()) {
jacobian_forward(jacobian_out);
}
else {
jacobian_reverse(jacobian_out);
}
}
// Print each derivative statement to the specified stream (standard
// output if omitted)
void
Stack::print_statements(std::ostream& os) const
{
for (Offset ist = 1; ist < n_statements_; ist++) {
const Statement& statement = statement_[ist];
os << ist
<< ": d[" << statement.offset
<< "] = ";
if (statement_[ist-1].end_plus_one == statement_[ist].end_plus_one) {
os << "0\n";
}
else {
for (Offset i = statement_[ist-1].end_plus_one;
i < statement.end_plus_one; i++) {
os << " + " << multiplier_[i] << "*d[" << offset_[i] << "]";
}
os << "\n";
}
}
}
// Print the current gradient list to the specified stream (standard
// output if omitted)
bool
Stack::print_gradients(std::ostream& os) const
{
if (gradients_are_initialized()) {
for (Offset i = 0; i < max_gradient_; i++) {
if (i%10 == 0) {
if (i != 0) {
os << "\n";
}
os << i << ":";
}
os << " " << gradient_[i];
}
os << "\n";
return true;
}
else {
os << "No gradients initialized\n";
return false;
}
}
// Print the list of gaps in the gradient list to the specified
// stream (standard output if omitted)
void
Stack::print_gaps(std::ostream& os) const
{
for (std::list<Gap>::const_iterator it = gap_list_.begin();
it != gap_list_.end(); it++) {
os << it->start << "-" << it->end << " ";
}
}
#ifndef ADEPT_STACK_STORAGE_STL
// Initialize the vector of gradients ready for the adjoint
// calculation
void
Stack::initialize_gradients()
{
if (max_gradient_ > 0) {
if (n_allocated_gradients_ < max_gradient_) {
if (gradient_) {
delete[] gradient_;
}
gradient_ = new Real[max_gradient_];
n_allocated_gradients_ = max_gradient_;
}
for (Offset i = 0; i < max_gradient_; i++) {
gradient_[i] = 0.0;
}
}
gradients_initialized_ = true;
}
#else
void
Stack::initialize_gradients()
{
gradient_.resize(max_gradient_+10, 0.0);
gradients_initialized_ = true;
}
#endif
// Report information about the stack to the specified stream, or
// standard output if omitted; note that this is synonymous with
// sending the Stack object to a stream using the "<<" operator.
void
Stack::print_status(std::ostream& os) const
{
os << "Automatic Differentiation Stack (address " << this << "):\n";
if ((!is_thread_unsafe_) && _stack_current_thread == this) {
os << " Currently attached - thread safe\n";
}
else if (is_thread_unsafe_ && _stack_current_thread_unsafe == this) {
os << " Currently attached - thread unsafe\n";
}
else {
os << " Currently detached\n";
}
os << " Recording status:\n";
// Account for the null statement at the start by subtracting one
os << " " << n_statements()-1 << " statements ("
<< n_allocated_statements() << " allocated)";
os << " and " << n_operations() << " operations ("
<< n_allocated_operations() << " allocated)\n";
os << " " << n_gradients_registered() << " gradients currently registered ";
os << "and a total of " << max_gradients() << " needed (current index "
<< i_gradient() << ")\n";
if (gap_list_.empty()) {
os << " Gradient list has no gaps\n";
}
else {
os << " Gradient list has " << gap_list_.size() << " gaps (";
print_gaps(os);
os << ")\n";
}
os << " Computation status:\n";
if (gradients_are_initialized()) {
os << " " << max_gradients() << " gradients assigned ("
<< n_allocated_gradients() << " allocated)\n";
}
else {
os << " 0 gradients assigned (" << n_allocated_gradients()
<< " allocated)\n";
}
os << " Jacobian size: " << n_dependents() << "x" << n_independents() << "\n";
#ifdef _OPENMP
if (have_openmp_) {
if (openmp_manually_disabled_) {
os << " Parallel Jacobian calculation manually disabled\n";
}
else {
os << " Parallel Jacobian calculation can use up to "
<< omp_get_max_threads() << " threads\n";
os << " Each thread treats " << ADEPT_MULTIPASS_SIZE
<< " (in)dependent variables\n";
}
}
else {
#endif
os << " Parallel Jacobian calculation not available\n";
#ifdef _OPENMP
}
#endif
}
} // End namespace adept
// =================================================================
// Contents of adept_openmp.cpp
// =================================================================
/* adept_openmp.cpp -- OpenMP-enabled Jacobian calculation for Adept library
Copyright (C) 2013-2015 The University of Reading
Author: Robin Hogan <r.j.hogan@reading.ac.uk>
This file is part of the Adept library.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This file can and should be compiled even if OpenMP is not enabled.
*/
#ifdef _OPENMP
#include <omp.h>
#endif
#include "adept.h"
namespace adept {
// Compute the Jacobian matrix, parallelized using OpenMP. Normally
// the user would call the jacobian or jacobian_forward functions,
// and the OpenMP version would only be called if OpenMP is
// available and the Jacobian matrix is large enough for
// parallelization to be worthwhile. Note that jacobian_out must be
// allocated to be of size m*n, where m is the number of dependent
// variables and n is the number of independents. The independents
// and dependents must have already been identified with the
// functions "independent" and "dependent", otherwise this function
// will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. In the
// resulting matrix, the "m" dimension of the matrix varies
// fastest. This is implemented using a forward pass, appropriate
// for m>=n.
void
Stack::jacobian_forward_openmp(Real* jacobian_out)
{
if (independent_offset_.empty() || dependent_offset_.empty()) {
throw(dependents_or_independents_not_identified());
}
// Number of blocks to cycle through, including a possible last
// block containing fewer than ADEPT_MULTIPASS_SIZE variables
int n_block = (n_independent() + ADEPT_MULTIPASS_SIZE - 1)
/ ADEPT_MULTIPASS_SIZE;
Offset n_extra = n_independent() % ADEPT_MULTIPASS_SIZE;
int iblock;
#pragma omp parallel
{
std::vector<Block<ADEPT_MULTIPASS_SIZE,Real> >
gradient_multipass_b(max_gradient_);
#pragma omp for
for (iblock = 0; iblock < n_block; iblock++) {
// Set the offset to the dependent variables for this block
Offset i_independent = ADEPT_MULTIPASS_SIZE * iblock;
Offset block_size = ADEPT_MULTIPASS_SIZE;
// If this is the last iteration and the number of extra
// elements is non-zero, then set the block size to the number
// of extra elements. If the number of extra elements is zero,
// then the number of independent variables is exactly divisible
// by ADEPT_MULTIPASS_SIZE, so the last iteration will be the
// same as all the rest.
if (iblock == n_block-1 && n_extra > 0) {
block_size = n_extra;
}
// Set the initial gradients all to zero
for (Offset i = 0; i < gradient_multipass_b.size(); i++) {
gradient_multipass_b[i].zero();
}
// Each seed vector has one non-zero entry of 1.0
for (Offset i = 0; i < block_size; i++) {
gradient_multipass_b[independent_offset_[i_independent+i]][i] = 1.0;
}
// Loop forward through the derivative statements
for (Offset ist = 1; ist < n_statements_; ist++) {