Skip to content

Commit

Permalink
Implemented file cache for document helper and some minor bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Kees committed Oct 14, 2023
1 parent 47cfd4b commit f50b5af
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 155 deletions.
34 changes: 27 additions & 7 deletions ChromiumHtmlToPdfLib/Browser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -743,11 +743,25 @@ internal void WriteToLog(string message)
/// </summary>
public void Dispose()
{
_pageConnection.OnError -= OnOnError;
_pageConnection?.Dispose();
if (_pageConnection != null)
_pageConnection.OnError -= OnOnError;

if (_browserConnection != null)
_browserConnection.OnError -= OnOnError;

CloseAsync(CancellationToken.None).GetAwaiter().GetResult();

_browserConnection.OnError -= OnOnError;
_browserConnection?.Dispose();
if (_pageConnection != null)
{
_pageConnection.Dispose();
_pageConnection = null;
}

if (_browserConnection != null)
{
_browserConnection.Dispose();
_browserConnection = null;
}
}
#endregion

Expand All @@ -759,15 +773,21 @@ public void Dispose()
public async ValueTask DisposeAsync()
{
if (_pageConnection != null)
{
_pageConnection.OnError -= OnOnError;

if (_browserConnection != null)
_browserConnection.OnError -= OnOnError;

await CloseAsync(CancellationToken.None);

if (_pageConnection != null)
{
await _pageConnection.DisposeAsync();
_pageConnection = null;
}

if (_pageConnection != null)
if (_browserConnection != null)
{
_browserConnection.OnError -= OnOnError;
await _browserConnection.DisposeAsync();
_browserConnection = null;
}
Expand Down
1 change: 1 addition & 0 deletions ChromiumHtmlToPdfLib/ChromiumHtmlToPdfLib.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
<PackageReference Include="AngleSharp.Css" Version="0.17.0" />
<PackageReference Include="AngleSharp.Io" Version="1.0.0" />
<PackageReference Include="AngleSharp.Xml" Version="1.0.0" />
<PackageReference Include="FileCache" Version="3.3.0" />
<PackageReference Include="HtmlSanitizer" Version="8.0.723" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="7.0.1" />
<PackageReference Include="Microsoft.Win32.Registry" Version="5.0.0" />
Expand Down
28 changes: 12 additions & 16 deletions ChromiumHtmlToPdfLib/Connection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ private async Task ReceiveLoop()
{
// Ignore
}
catch (Exception e)
catch (Exception exception)
{
WebSocketOnError(new ErrorEventArgs(e));
WebSocketOnError(new ErrorEventArgs(exception));
}
finally
{
Expand Down Expand Up @@ -361,29 +361,25 @@ public async Task InternalDisposeAsync()
{
WriteToLog($"Disposing websocket connection to url '{_url}'");

try
{
WebSocketOnClosed(EventArgs.Empty);
}
catch (Exception exception)
{
WriteToLog($"Exception while disposing websocket connection to url '{_url}': {exception.Message}");
}

if (_webSocket.State == WebSocketState.Open)
{
WriteToLog("Closing websocket");

try
{
await _webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Done", default);
WriteToLog("Websocket connection disposed gracefully");
}
finally
catch
{
_receiveLoopCts.Cancel();
_webSocket.Dispose();
WriteToLog("Websocket connection disposed");
// Ignore
}

WriteToLog("Websocket connection closed");

WebSocketOnClosed(EventArgs.Empty);
_receiveLoopCts.Cancel();
_webSocket.Dispose();
WriteToLog("Websocket connection disposed");
}
}
#endregion
Expand Down
11 changes: 5 additions & 6 deletions ChromiumHtmlToPdfLib/Converter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ private enum OutputFormat
/// <summary>
/// <see cref="SetDiskCache"/>
/// </summary>
private long _cacheSize;
/// <remarks>
/// Default set to 1GB
/// </remarks>
private long _cacheSize = 1073741824;

/// <summary>
/// <see cref="GetDocumentHelper"/>
Expand Down Expand Up @@ -647,9 +650,7 @@ private void StartChromiumHeadless()
processStartInfo.LoadUserProfile = true;
}
else
{
WriteToLog("Ignoring password and loading user profile because this is only supported on Windows");
}
}

if (!_userProfileSet)
Expand Down Expand Up @@ -687,8 +688,7 @@ private void StartChromiumHeadless()

if (_conversionTimeout.HasValue)
if (!_chromiumWaitEvent.WaitOne(_conversionTimeout.Value))
throw new ChromiumException(
$"A timeout of '{_conversionTimeout.Value}' milliseconds exceeded, could not make a connection to the Chromium dev tools");
throw new ChromiumException($"A timeout of '{_conversionTimeout.Value}' milliseconds exceeded, could not make a connection to the Chromium dev tools");

_chromiumWaitEvent.WaitOne();

Expand Down Expand Up @@ -2610,7 +2610,6 @@ private async Task InternalDisposeAsync()
try
{
WriteToLog($"Closing {BrowserName} browser gracefully");
await _browser.CloseAsync(default);
#if (NETSTANDARD2_0)
_browser.Dispose();
#else
Expand Down
96 changes: 73 additions & 23 deletions ChromiumHtmlToPdfLib/Helpers/DocumentHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Runtime.Caching;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -95,12 +96,17 @@ internal class DocumentHelper : IDisposable
/// <summary>
/// The cache folder
/// </summary>
readonly DirectoryInfo _cacheDirectory;
private DirectoryInfo _cacheDirectory;

/// <summary>
/// The cache size
/// </summary>
private readonly long? _cacheSize;
private readonly long _cacheSize;

/// <summary>
/// <see cref="FileCache"/>
/// </summary>
private FileCache _fileCache;
#endregion

#region Properties
Expand Down Expand Up @@ -133,6 +139,35 @@ internal int TimeLeft
return (int)result;
}
}

/// <summary>
/// Returns a file cache
/// </summary>
private FileCache FileCache
{
get
{
if (_fileCache != null)
return _fileCache;

_cacheDirectory = new DirectoryInfo(Path.Combine("d:\\", "HttpClientHandler"));
//_cacheDirectory = new DirectoryInfo(Path.Combine(_cacheDirectory.FullName, "HttpClientHandler"));

if (!_cacheDirectory.Exists)
_cacheDirectory.Create();

FileCache.DefaultCacheManager = FileCacheManagers.Hashed;

_fileCache = new FileCache(_cacheDirectory.FullName)
{
MaxCacheSize = _cacheSize,
AccessTimeout = TimeSpan.FromSeconds(10),
DefaultPolicy = new CacheItemPolicy { SlidingExpiration = TimeSpan.FromDays(1) },
};

return _fileCache;
}
}
#endregion

#region Constructor
Expand All @@ -149,7 +184,7 @@ internal int TimeLeft
public DocumentHelper(DirectoryInfo tempDirectory,
bool useCache,
DirectoryInfo cacheDirectory,
long? cacheSize,
long cacheSize,
WebProxy webProxy,
int? imageLoadTimeout,
ILogger logger)
Expand Down Expand Up @@ -916,33 +951,48 @@ private async Task<Stream> OpenDownloadStream(Uri sourceUri, bool checkTimeout =
{
try
{
var httpClientHandler = new HttpClientHandler();

if (_webProxy != null)
httpClientHandler.Proxy = _webProxy;
if (_useCache)
{
var item = FileCache.GetCacheItem(sourceUri.ToString());
if (item is { Value: not null })
{
WriteToLog($"Returning stream for url '{sourceUri}' from the cache");
return new MemoryStream((byte[])item.Value);
}
}

var handler = new FileCacheHandler(httpClientHandler, _cacheDirectory, _cacheSize.Value);
using var client = new HttpClient(handler);
var request = WebRequest.CreateHttp(sourceUri);
var timeLeft = TimeLeft;

//if (_stopwatch != null && checkTimeout)
//{
// if (timeLeft == 0)
// {
// WriteToLog($"Image load has timed out, skipping opening stream to url '{sourceUri}'");
// return null;
// }

// request.Timeout = TimeLeft;
//}

if (_stopwatch != null && checkTimeout)
{
if (timeLeft == 0)
{
WriteToLog($"Image load has timed out, skipping opening stream to url '{sourceUri}'");
return null;
}

request.Timeout = TimeLeft;
}

WriteToLog($"Opening stream to url '{sourceUri}'{(_stopwatch != null ? $" with a timeout of {timeLeft} milliseconds" : string.Empty)}");
var response = await client.GetAsync(sourceUri);

//WriteToLog($"Opened {(response.IsFromCache ? "cached " : string.Empty)}stream to url '{sourceUri}'");
return await response.Content.ReadAsStreamAsync();
var response = await request.GetResponseAsync();

if (!_useCache)
return response.GetResponseStream();

var stream = response.GetResponseStream();

if (stream == null)
return response.GetResponseStream();

var memoryStream = new MemoryStream();
await stream.CopyToAsync(memoryStream);
WriteToLog($"Adding item from url '{sourceUri}' to the cache");
FileCache.Add(sourceUri.ToString(), memoryStream.ToArray(), new CacheItemPolicy { SlidingExpiration = TimeSpan.FromDays(1) });
memoryStream.Position = 0;
return memoryStream;
}
catch (Exception exception)
{
Expand Down
Loading

0 comments on commit f50b5af

Please sign in to comment.