-
Notifications
You must be signed in to change notification settings - Fork 11
/
BulkLoadContext.cs
368 lines (304 loc) · 13.4 KB
/
BulkLoadContext.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
using System;
using System.Collections.Generic;
using System.Linq;
using log4net;
using Sitecore.Buckets.Util;
using Sitecore.ContentSearch;
using Sitecore.Data.Items;
using Sitecore.Diagnostics;
namespace Sitecore.DataBlaster.Load
{
/// <summary>
/// Should be created a fresh for every bulk import action.
/// </summary>
public class BulkLoadContext
{
private ILog _log;
public ILog Log
{
get { return _log; }
set
{
if (value == null) throw new ArgumentNullException(nameof(value));
_log = value;
}
}
public string FailureMessage { get; private set; }
public string Database { get; private set; }
/// <summary>
/// Stages data to temp tables, but don't merge it with existing data.
/// Useful for debugging.
/// </summary>
public bool StageDataWithoutProcessing { get; set; }
/// <summary>
/// Whether to lookup item ids in database by item name or use the item id provided in the value of the field.
/// </summary>
public bool LookupItemIds { get; set; }
/// <summary>
/// Performance optimization when loading items into a specific repository (supports bucketing).
/// </summary>
public ItemReference Destination { get; private set; }
/// <summary>
/// Whether to lookup blob ids in database or use the blob id provided in the value of the field.
/// </summary>
public bool LookupBlobIds { get; set; }
/// <summary>
/// Whether bulk load allows template changes.
/// Typically used during serialization.
/// When set, provided bulk items should contain ALL fields and not partial data.
/// </summary>
public bool AllowTemplateChanges { get; set; }
/// <summary>
/// In the initial version if Sitecore 9.3, they did not put all field records in the right fields table (versioned/unversioned/shared).
/// E.g. the Display Name is configured as unversioned field on the template, but the actual data is stored in the versioned table.
/// We need this extra flag to be able to disable the cleanup operation during bulk load, else the Display Name values will be removed, resulting in empty context menu's.
/// This flag is typically only relevant during deserialization actions.
/// </summary>
public bool AllowCleanupOfFields { get; set; } = true;
/// <summary>
/// Forces updates in Sitecore database, so that all loaded items will have an item change.
/// All modification dates will be reset.
/// </summary>
public bool ForceUpdates { get; set; }
/// <summary>
/// Additional processing rules for fiels which will affect all items with those specified fields.
/// </summary>
public IList<FieldRule> FieldRules { get; set; }
/// <summary>
/// Will ensure bucket folder structure for items that are directly added to a parent that is a bucket.
/// Be aware, this needs to do additional database reads while processing the item stream.
/// </summary>
public bool BucketIfNeeded { get; set; }
/// <summary>
/// Resolves the paths for items in buckets.
/// </summary>
public IDynamicBucketFolderPath BucketFolderPath { get; set; }
/// <summary>
/// Whether to remove updated items from Sitecore caches. Enabled by default.
/// This setting is not impacted by the value of <seealso cref="ClearCaches"/>.
/// </summary>
public bool RemoveItemsFromCaches { get; set; }
/// <summary>
/// Offers an alternative strategy to remove items from Sitecore caches, by clearing them completely.
/// This setting is not impacted by the value of <seealso cref="RemoveItemsFromCaches"/>.
///
/// When both the imported data set and the Sitecore caches are quite large, there is a performance impact in scanning the caches for entries that must be deleted.
/// In this case it could prove more useful to just clear the caches, instead of spending time to scan them. The performance impact is then in repopulation, though.
/// For settings that have an impact on cache removal performance, see <see cref="Sitecore.Configuration.Settings.Caching.CacheKeyIndexingEnabled"/>.
/// </summary>
public bool ClearCaches { get; set; }
/// <summary>
/// Whether to update the history engine of Sitecore. This engine is e.g. used for index syncs.
/// </summary>
public bool? UpdateHistory { get; set; }
/// <summary>
/// Whether to update the publish queue of Sitecore. This queue is used for incremental publishing.
/// </summary>
public bool? UpdatePublishQueue { get; set; }
/// <summary>
/// Whether to update the link database.
/// </summary>
public bool? UpdateLinkDatabase { get; set; }
/// <summary>
/// Whether to update the indexes of Sitecore. Enabled by default.
/// </summary>
public bool UpdateIndexes { get; set; }
private IList<ISearchIndex> _allIndexes;
private IList<ISearchIndex> _indexesToUpdate;
/// <summary>
/// Which indexes to update, will be detected from database by default.
/// </summary>
public IList<ISearchIndex> IndexesToUpdate
{
get
{
if (_indexesToUpdate != null)
return _indexesToUpdate;
// No specific indexes provided, fallback to all indexes for the current Database
if (_allIndexes != null)
return _allIndexes;
_allIndexes = ContentSearchManager.Indexes
.Where(idx => idx.Crawlers
.OfType<SitecoreItemCrawler>()
.Any(c => Database.Equals(c.Database, StringComparison.OrdinalIgnoreCase)))
.ToList();
return _allIndexes;
}
set { _indexesToUpdate = value; }
}
/// <summary>
/// Threshold percentage to refresh destination in index instead of updating it one by one.
/// </summary>
public int? IndexRefreshThresholdPercentage { get; set; }
/// <summary>
/// Threshold percentage to rebuild index instead of updating it one by one.
/// </summary>
public int? IndexRebuildThresholdPercentage { get; set; }
/// <summary>
/// Data is staged in database but no changes are made yet.
/// </summary>
public Action<BulkLoadContext> OnDataStaged { get; set; }
/// <summary>
/// Data is loaded in database.
/// </summary>
public Action<BulkLoadContext> OnDataLoaded { get; set; }
/// <summary>
/// Data is indexed.
/// </summary>
public Action<BulkLoadContext, ICollection<ItemChange>> OnDataIndexed { get; set; }
public LinkedList<ItemChange> ItemChanges { get; } = new LinkedList<ItemChange>();
protected internal BulkLoadContext(string database)
{
if (string.IsNullOrEmpty(database)) throw new ArgumentNullException(nameof(database));
Database = database;
RemoveItemsFromCaches = true;
UpdateIndexes = true;
Log = LoggerFactory.GetLogger(typeof(BulkLoader));
}
public void LookupItemsIn(Guid itemId, string itemPath)
{
LookupItemIds = true;
Destination = new ItemReference(itemId, itemPath);
}
public void LookupItemsIn(Item item)
{
if (item == null) throw new ArgumentNullException(nameof(item));
LookupItemsIn(item.ID.Guid, item.Paths.Path);
}
public bool ShouldUpdateIndex(ISearchIndex searchIndex, ISearchIndexSummary searchIndexSummary)
{
// Always update when index has been explicitly set as to update
if (_indexesToUpdate != null && _indexesToUpdate.Contains(searchIndex))
return true;
// Only update when index is not empty: updating an empty index would trigger a rebuild.
return searchIndexSummary.NumberOfDocuments > 0;
}
#region Stage results and feedback
private readonly Dictionary<Stage, StageResult> _stageResults = new Dictionary<Stage, StageResult>();
public bool AnyStageFailed => _stageResults.Any(x => x.Value.HasFlag(StageResult.Failed));
protected virtual void AddStageResult(Stage stage, StageResult result)
{
StageResult r;
r = _stageResults.TryGetValue(stage, out r)
? r | result
: result;
_stageResults[stage] = r;
}
public virtual void StageSucceeded(Stage stage)
{
AddStageResult(stage, StageResult.Succeeded);
}
public virtual void StageFailed(Stage stage, Exception ex, string message)
{
AddStageResult(stage, StageResult.Failed);
if (ex == null)
Log.Fatal(message);
else
Log.Fatal(message +
$"\nException type: {ex.GetType().Name}\nException message: {ex.Message}\nStack trace: {ex.StackTrace}");
FailureMessage = message;
}
public virtual void StageFailed(Stage stage, string message)
{
StageFailed(stage, null, message);
}
public virtual void SkipItemWarning(string message)
{
Log.Warn(message + " Skipping item.");
}
public virtual void SkipItemDebug(string message)
{
Log.Debug(message + " Skipping item.");
}
#endregion
#region Tracked item data
/// <summary>
/// Tracks path and template info of the bulk item within the context,
/// so that we can check whether bucketing is still needed, or do lookups by path.
/// Doesn't keep a reference to the item, so that we're not too memory intensive.
/// </summary>
/// <param name="item">Item to attach.</param>
public virtual void TrackPathAndTemplateInfo(BulkLoadItem item)
{
// Cache template id per item.
var templateCache = GetTemplateCache();
templateCache[item.Id] = item.TemplateId;
// Cache path.
IDictionary<string, Guid> pathCache = null;
if (!string.IsNullOrWhiteSpace(item.ItemPath))
{
pathCache = GetPathCache();
pathCache[item.ItemPath] = item.Id;
}
// Cache lookup path.
if (!string.IsNullOrWhiteSpace(item.ItemLookupPath))
{
pathCache = pathCache ?? GetPathCache();
pathCache[item.ItemLookupPath] = item.Id;
}
}
private IDictionary<string, Guid> GetPathCache()
{
return GetOrAddState("Transform.PathCache",
() => new Dictionary<string, Guid>(StringComparer.OrdinalIgnoreCase));
}
private IDictionary<Guid, Guid> GetTemplateCache()
{
return GetOrAddState("Import.TemplateCache",
() => new Dictionary<Guid, Guid>());
}
public virtual Guid? GetProcessedPath(string itemPath)
{
if (string.IsNullOrWhiteSpace(itemPath)) return null;
var cache = GetPathCache();
Guid id;
return cache.TryGetValue(itemPath, out id) ? id : (Guid?)null;
}
public virtual Guid? GetProcessedItemTemplateId(Guid itemId)
{
var cache = GetTemplateCache();
Guid id;
return cache.TryGetValue(itemId, out id) ? id : (Guid?)null;
}
#endregion
#region Additional state
private readonly Dictionary<string, object> _state =
new Dictionary<string, object>(StringComparer.OrdinalIgnoreCase);
/// <summary>
/// Gets state from the context.
/// </summary>
/// <typeparam name="T">Type of the state.</typeparam>
/// <param name="key">Key for the state.</param>
/// <param name="defaultValue">Default value when state is not present.</param>
/// <returns>Retrieved state or default value.</returns>
/// <remarks>Not thread safe.</remarks>
public T GetState<T>(string key, T defaultValue = default(T))
{
object state;
if (!_state.TryGetValue(key, out state))
{
return defaultValue;
}
return (T)state;
}
/// <summary>
/// Gets or adds new state to the context.
/// </summary>
/// <typeparam name="T">Type of the state.</typeparam>
/// <param name="key">Key for the state.</param>
/// <param name="stateFactory">Factory to create new state.</param>
/// <returns>Retrieved or newly added state.</returns>
/// <remarks>Not thread safe.</remarks>
public T GetOrAddState<T>(string key, Func<T> stateFactory)
{
object state;
if (!_state.TryGetValue(key, out state))
{
state = stateFactory();
_state[key] = state;
}
return (T)state;
}
#endregion
}
}