From f2f246a61a4c654ef5ff65a530c2c0529935a304 Mon Sep 17 00:00:00 2001 From: Sebastian Thomschke Date: Tue, 20 Aug 2024 15:34:19 +0200 Subject: [PATCH 1/2] fix: DocumentInputStream does not handle surrogate pairs correctly --- org.eclipse.lsp4e.test/pom.xml | 2 +- .../test/internal/CharsInputStreamTest.java | 127 ++++++++ .../internal/DocumentInputStreamTest.java | 152 +++++++++ .../eclipse/lsp4e/DocumentInputStream.java | 36 +-- .../lsp4e/internal/CharsInputStream.java | 295 ++++++++++++++++++ .../eclipse/lsp4e/internal/DocumentUtil.java | 20 +- 6 files changed, 602 insertions(+), 30 deletions(-) create mode 100644 org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/CharsInputStreamTest.java create mode 100644 org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/DocumentInputStreamTest.java create mode 100644 org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/CharsInputStream.java diff --git a/org.eclipse.lsp4e.test/pom.xml b/org.eclipse.lsp4e.test/pom.xml index 42684cb2c..7192c33da 100644 --- a/org.eclipse.lsp4e.test/pom.xml +++ b/org.eclipse.lsp4e.test/pom.xml @@ -42,7 +42,7 @@ true true 1200 - -Xms1g -Xmx1g -Djava.util.logging.config.file=${project.basedir}/src/jul.properties ${ui.test.vmargs} ${os-jvm-flags} + -Dfile.encoding=${project.build.sourceEncoding} -Xms1g -Xmx1g -Djava.util.logging.config.file=${project.basedir}/src/jul.properties ${ui.test.vmargs} ${os-jvm-flags} diff --git a/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/CharsInputStreamTest.java b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/CharsInputStreamTest.java new file mode 100644 index 000000000..80f3c4fea --- /dev/null +++ b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/CharsInputStreamTest.java @@ -0,0 +1,127 @@ +/******************************************************************************* + * Copyright (c) 2024 Sebastian Thomschke and others. + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Sebastian Thomschke - initial implementation + *******************************************************************************/ +package org.eclipse.lsp4e.test.internal; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.ArrayList; + +import org.eclipse.lsp4e.internal.CharsInputStream; +import org.junit.Test; + +public class CharsInputStreamTest { + + private static final String TEST_ASCII = "Hello, World!"; + + private static final String EMOJI = "😊"; + private static final int EMOJI_BYTES_LEN = EMOJI.getBytes(UTF_8).length; + private static final String JAPANESE = "こんにちは"; + private static final String TEST_UNICODE = EMOJI + JAPANESE; + private static final int TEST_UNICODE_BYTES_LEN = TEST_UNICODE.getBytes(UTF_8).length; + + @Test + public void testAvailable() throws IOException { + try (var is = new CharsInputStream(TEST_ASCII)) { + assertEquals(TEST_ASCII.length(), is.available()); + final byte[] buffer = new byte[4]; + is.read(buffer); + assertEquals(TEST_ASCII.length() - 4, is.available()); + is.readAllBytes(); + assertEquals(0, is.available()); + } + + try (var is = new CharsInputStream(TEST_UNICODE)) { + assertTrue(is.available() > 0); + is.read(new byte[10]); + assertTrue(is.available() > 0); + is.readAllBytes(); + assertEquals(0, is.available()); + } + } + + @Test + public void testEndOfStream() throws IOException { + try (var is = new CharsInputStream(TEST_UNICODE)) { + is.skip(Long.MAX_VALUE); + assertEquals(-1, is.read()); + } + } + + @Test + public void testReadEachByte() throws IOException { + try (var is = new CharsInputStream(TEST_UNICODE)) { + final var bytesRead = new ArrayList(); + int b; + while ((b = is.read()) != -1) { + bytesRead.add((byte) b); + } + + final byte[] byteArray = new byte[bytesRead.size()]; + for (int i = 0; i < bytesRead.size(); i++) { + byteArray[i] = bytesRead.get(i); + } + assertEquals(TEST_UNICODE, new String(byteArray, UTF_8)); + } + } + + @Test + public void testReadIntoByteArray() throws IOException { + final byte[] buffer = new byte[1024]; // Buffer to read a portion of the text + + try (var is = new CharsInputStream(TEST_UNICODE)) { + final int bytesRead = is.read(buffer, 0, buffer.length); + + assertEquals(TEST_UNICODE, new String(buffer, 0, bytesRead, UTF_8)); + } + } + + @Test + public void testSkip() throws IOException { + try (var is = new CharsInputStream(TEST_UNICODE)) { + // skip emoji + final long skipped = is.skip(EMOJI_BYTES_LEN); + assertEquals(EMOJI_BYTES_LEN, skipped); + + final byte[] japanese = new byte[TEST_UNICODE_BYTES_LEN]; + final int bytesRead = is.read(japanese); + + assertEquals(JAPANESE, new String(japanese, 0, bytesRead, UTF_8)); + } + } + + @Test + public void testHighSurrogateAtEndOfInput() throws IOException { + final char[] invalidSequence = { 'A', '\uD800' }; // valid char followed by an isolated high surrogate + try (var is = new CharsInputStream(new String(invalidSequence), UTF_8)) { + final byte[] result = is.readAllBytes(); + final String output = new String(result, UTF_8); + + // the high surrogate at the end should be replaced by the + // Unicode replacement char + assertEquals("A" + CharsInputStream.UNICODE_REPLACEMENT_CHAR, output); + } + } + + @Test + public void testHighSurrogateWithoutLowSurrogate() throws IOException { + final char[] invalidSequence = { '\uD800', 'A' }; // \uD800 is a high surrogate, followed by 'A' + try (var is = new CharsInputStream(new String(invalidSequence), UTF_8)) { + final byte[] result = is.readAllBytes(); + final String output = new String(result, UTF_8); + + // the invalid surrogate pair should be replaced by the Unicode replacement char + assertEquals(CharsInputStream.UNICODE_REPLACEMENT_CHAR + "A", output); + } + } +} diff --git a/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/DocumentInputStreamTest.java b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/DocumentInputStreamTest.java new file mode 100644 index 000000000..dc1b35854 --- /dev/null +++ b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/DocumentInputStreamTest.java @@ -0,0 +1,152 @@ +/******************************************************************************* + * Copyright (c) 2024 Sebastian Thomschke and others. + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Sebastian Thomschke - initial implementation + *******************************************************************************/ +package org.eclipse.lsp4e.test.internal; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.ArrayList; + +import org.eclipse.core.runtime.CoreException; +import org.eclipse.jface.text.IDocument; +import org.eclipse.lsp4e.DocumentInputStream; +import org.eclipse.lsp4e.LSPEclipseUtils; +import org.eclipse.lsp4e.internal.CharsInputStream; +import org.eclipse.lsp4e.test.utils.AbstractTestWithProject; +import org.eclipse.lsp4e.test.utils.TestUtils; +import org.junit.Before; +import org.junit.Test; + +public class DocumentInputStreamTest extends AbstractTestWithProject { + + private static final String TEST_ASCII = "Hello, World!"; + + private static final String EMOJI = "😊"; + private static final int EMOJI_BYTES_LEN = EMOJI.getBytes(UTF_8).length; + private static final String JAPANESE = "こんにちは"; + private static final String TEST_UNICODE = EMOJI + JAPANESE; + private static final int TEST_UNICODE_BYTES_LEN = TEST_UNICODE.getBytes(UTF_8).length; + + private IDocument document; + + @Before + public void setUp() throws CoreException { + final var testFile = TestUtils.createUniqueTestFile(project, TEST_UNICODE); + document = LSPEclipseUtils.getDocument(testFile); + } + + @Test + public void testAvailable() throws IOException { + document.set(TEST_ASCII); + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + assertEquals(TEST_ASCII.length(), is.available()); + final byte[] buffer = new byte[4]; + is.read(buffer); + assertEquals(TEST_ASCII.length() - 4, is.available()); + is.readAllBytes(); + assertEquals(0, is.available()); + } + + document.set(TEST_UNICODE); + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + assertTrue(is.available() > 0); + is.read(new byte[10]); + assertTrue(is.available() > 0); + is.readAllBytes(); + assertEquals(0, is.available()); + } + } + + @Test + public void testEndOfStream() throws IOException { + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + is.skip(Long.MAX_VALUE); + assertEquals(-1, is.read()); + } + } + + @Test + public void testReadEachByte() throws IOException { + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + final var bytesRead = new ArrayList(); + int b; + while ((b = is.read()) != -1) { + bytesRead.add((byte) b); + } + + final byte[] byteArray = new byte[bytesRead.size()]; + for (int i = 0; i < bytesRead.size(); i++) { + byteArray[i] = bytesRead.get(i); + } + assertEquals(TEST_UNICODE, new String(byteArray, UTF_8)); + } + } + + @Test + public void testReadIntoByteArray() throws IOException { + final byte[] buffer = new byte[1024]; // Buffer to read a portion of the text + + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + final int bytesRead = is.read(buffer, 0, buffer.length); + + assertEquals(TEST_UNICODE, new String(buffer, 0, bytesRead, UTF_8)); + } + } + + @Test + public void testSkip() throws IOException { + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + // skip emoji + final long skipped = is.skip(EMOJI_BYTES_LEN); + assertEquals(EMOJI_BYTES_LEN, skipped); + + final byte[] japanese = new byte[TEST_UNICODE_BYTES_LEN]; + final int bytesRead = is.read(japanese); + + assertEquals(JAPANESE, new String(japanese, 0, bytesRead, UTF_8)); + } + } + + @Test + public void testHighSurrogateAtEndOfInput() throws IOException { + document.set(new String(new char[] { 'A', '\uD800' })); // valid char followed by an isolated high surrogate + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + final byte[] result = is.readAllBytes(); + final String output = new String(result, UTF_8); + + // the high surrogate at the end should be replaced by the + // Unicode replacement char + assertEquals("A" + CharsInputStream.UNICODE_REPLACEMENT_CHAR, output); + } + } + + @Test + public void testHighSurrogateWithoutLowSurrogate() throws IOException { + document.set(new String(new char[] { '\uD800', 'A' })); // \uD800 is a high surrogate, followed by 'A' + try (var is = new DocumentInputStream(document)) { + assertEquals(UTF_8, is.getCharset()); + final byte[] result = is.readAllBytes(); + final String output = new String(result, UTF_8); + + // the invalid surrogate pair should be replaced by the Unicode replacement char + assertEquals(CharsInputStream.UNICODE_REPLACEMENT_CHAR + "A", output); + } + } +} diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/DocumentInputStream.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/DocumentInputStream.java index 61d434d3c..f1b856cbc 100644 --- a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/DocumentInputStream.java +++ b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/DocumentInputStream.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2019 Red Hat Inc. and others. + * Copyright (c) 2024 Sebastian Thomschke and others. * This program and the accompanying materials are made * available under the terms of the Eclipse Public License 2.0 * which is available at https://www.eclipse.org/legal/epl-2.0/ @@ -7,37 +7,17 @@ * SPDX-License-Identifier: EPL-2.0 * * Contributors: - * Mickael Istria (Red Hat Inc.) - initial implementation + * Sebastian Thomschke - initial implementation *******************************************************************************/ - package org.eclipse.lsp4e; -import java.io.IOException; -import java.io.InputStream; - -import org.eclipse.jface.text.BadLocationException; import org.eclipse.jface.text.IDocument; +import org.eclipse.lsp4e.internal.CharsInputStream; +import org.eclipse.lsp4e.internal.DocumentUtil; -final class DocumentInputStream extends InputStream { - private int index = 0; - private final IDocument document; +public final class DocumentInputStream extends CharsInputStream { - DocumentInputStream(IDocument document) { - this.document = document; + public DocumentInputStream(final IDocument doc) { + super(doc::getChar, doc::getLength, DocumentUtil.getCharset(doc)); } - - @Override - public int read() throws IOException { - if (index < document.getLength()) { - try { - char res = document.getChar(index); - index++; - return res; - } catch (BadLocationException e) { - throw new IOException(e); - } - } - return -1; - } - -} \ No newline at end of file +} diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/CharsInputStream.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/CharsInputStream.java new file mode 100644 index 000000000..eaad99a23 --- /dev/null +++ b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/CharsInputStream.java @@ -0,0 +1,295 @@ +/******************************************************************************* + * Copyright (c) 2024 Sebastian Thomschke and others. + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Sebastian Thomschke - initial implementation + *******************************************************************************/ +package org.eclipse.lsp4e.internal; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.util.Objects; +import java.util.function.IntSupplier; + +public class CharsInputStream extends InputStream { + + /** + * Functional interface for supplying characters at a specified index. + * Implementations can define how characters are fetched. + */ + @FunctionalInterface + public interface CharsSupplier { + char charAt(int index) throws Exception; + } + + private enum EncoderState { + /** + * The {@link #encoder} is actively encoding characters into bytes. This is the + * initial state of the encoder. + */ + ENCODING, // + + /** + * The {@link #encoder} has finished processing all characters and is now + * flushing any remaining bytes in its internal buffer. + */ + FLUSHING, // + + /** + * The {@link #encoder} has completed both the encoding and flushing processes. + * No more data is left to be read from the encoder. + */ + DONE + } + + public static final char UNICODE_REPLACEMENT_CHAR = '\uFFFD'; + + /** 512 surrogate character pairs */ + private static final int DEFAULT_BUFFER_SIZE = 512; + private static final int EOF = -1; + + private final int bufferSize; + private final CharBuffer charBuffer; + private final ByteBuffer byteBuffer; + private final CharsetEncoder encoder; + private EncoderState encoderState = EncoderState.ENCODING; + + private int charIndex = 0; + private final CharsSupplier chars; + private final IntSupplier charsLength; + + public CharsInputStream(final CharSequence chars) { + this(chars, Charset.defaultCharset()); + } + + public CharsInputStream(final CharSequence chars, final Charset charset) { + this(chars, charset, DEFAULT_BUFFER_SIZE); + } + + public CharsInputStream(final CharSequence chars, final Charset charset, final int bufferSize) { + this(chars::charAt, chars::length, charset, bufferSize); + } + + public CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength) { + this(chars, charsLength, Charset.defaultCharset()); + } + + /** + * @param chars + * function to access indexed chars. + * @param charsLength + * function to get the number of indexed chars provided by the + * chars parameter. + */ + public CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength, final Charset charset) { + this(chars, charsLength, charset, DEFAULT_BUFFER_SIZE); + } + + /** + * @param chars + * function to access indexed chars. + * @param charsLength + * function to get the number of indexed chars provided by the + * chars parameter. + * @param bufferSize + * number of surrogate character pairs to encode at once. + */ + public CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength, final Charset charset, + final int bufferSize) { + if (bufferSize < 1) + throw new IllegalArgumentException("[bufferSize] must be 1 or larger"); //$NON-NLS-1$ + encoder = charset.newEncoder(); + + this.bufferSize = bufferSize; + charBuffer = CharBuffer.allocate(bufferSize * 2); // buffer for 2 chars (high/low surrogate) + byteBuffer = ByteBuffer.allocate(bufferSize * 4); // buffer for one UTF character (up to 4 bytes) + byteBuffer.flip(); + charBuffer.flip(); + + this.chars = chars; + this.charsLength = charsLength; + } + + @Override + public int available() { + final int remaining = byteBuffer.remaining(); + return remaining == 0 ? charsLength.getAsInt() - charIndex : remaining; + } + + /** + * This method is called by {@link #refillByteBuffer()} to encode characters + * from the given {@link CharBuffer} into bytes and stores them in the + * {@link #byteBuffer}. + * + *

+ * The method can be used either to encode characters in the middle of input + * (with {@code isEndOfInput=false}) or to finalize the encoding process at the + * end of input (with {@code isEndOfInput=true}). + *

+ * + * @param in + * the {@link CharBuffer} containing characters to encode. + * @param isEndOfInput + * if {@code true}, signals that no more input will be provided, + * allowing the encoder to complete its final encoding steps. + */ + private void encodeChars(final CharBuffer in, final boolean isEndOfInput) throws CharacterCodingException { + byteBuffer.clear(); + final CoderResult result = encoder.encode(in, byteBuffer, isEndOfInput); + byteBuffer.flip(); + if (result.isError()) { + result.throwException(); + } + } + + /** + * Flushes the remaining bytes from the encoder to the {@link #byteBuffer}. + * + *

+ * This method is called by {@link #refillByteBuffer()} when all characters have + * been processed, and the encoder needs to output any remaining bytes. It + * transitions the encoder state from {@link EncoderState#ENCODING} to + * {@link EncoderState#FLUSHING}, and eventually to {@link EncoderState#DONE} + * once all bytes have been flushed. + *

+ * + * @return {@code true} if there are still bytes left in the {@link #byteBuffer} + * after flushing, or if the encoder still has more bytes to flush; + * {@code false} if the flush is complete and no bytes remain. + */ + private boolean flushEncoder() throws IOException { + if (encoderState == EncoderState.DONE) + return false; + + if (encoderState == EncoderState.ENCODING) { + encoderState = EncoderState.FLUSHING; + } + + // flush + byteBuffer.clear(); + final CoderResult result = encoder.flush(byteBuffer); + byteBuffer.flip(); + + if (result.isOverflow()) { + // the byteBuffer has been filled, but there are more bytes to be flushed. + // after reading all available bytes from byteBuffer, flushEncoder() needs to + // be called again to process the remaining data. + return true; + } + + if (result.isError()) { + result.throwException(); + } + + encoderState = EncoderState.DONE; + return byteBuffer.hasRemaining(); + } + + public Charset getCharset() { + return encoder.charset(); + } + + @Override + public int read() throws IOException { + if (!byteBuffer.hasRemaining() && !refillByteBuffer()) + return EOF; + return byteBuffer.get() & 0xFF; // next byte as an unsigned integer (0 to 255) + } + + @Override + public int read(final byte[] buf, final int off, final int bytesToRead) throws IOException { + Objects.checkFromIndexSize(off, bytesToRead, buf.length); + if (bytesToRead == 0) + return 0; + + int bytesRead = 0; + int bytesReadable = byteBuffer.remaining(); + + while (bytesRead < bytesToRead) { + if (bytesReadable == 0) { + if (refillByteBuffer()) { + bytesReadable = byteBuffer.remaining(); + } else + return bytesRead == 0 ? EOF : bytesRead; + } + + final int bytesToReadNow = Math.min(bytesToRead - bytesRead, bytesReadable); + byteBuffer.get(buf, off + bytesRead, bytesToReadNow); + bytesRead += bytesToReadNow; + bytesReadable -= bytesToReadNow; + } + + return bytesRead; + } + + /** + * Refills the {@link #byteBuffer} by reading characters from the character + * supplier, encoding them, and storing the resulting bytes into the + * {@link #byteBuffer}. + * + * @return {@code true} if the buffer was successfully refilled and has bytes + * available for reading, {@code false} if the end of the stream is + * reached and there are no more bytes to read. + */ + private boolean refillByteBuffer() throws IOException { + if (encoderState == EncoderState.DONE) + return false; + + if (encoderState == EncoderState.FLUSHING) + return flushEncoder(); + + final int charsLen = charsLength.getAsInt(); + + // if EOF is reached transition to flushing + if (charIndex >= charsLen) { + // finalize encoding before switching to flushing + encodeChars(CharBuffer.allocate(0), true /* signal EOF */); + return flushEncoder(); + } + + try { + charBuffer.clear(); + for (int i = 0; i < bufferSize && charIndex < charsLen; i++) { + final char nextChar = chars.charAt(charIndex++); + if (Character.isHighSurrogate(nextChar)) { // handle surrogate pairs + if (charIndex < charsLen) { + final char lowSurrogate = chars.charAt(charIndex); + if (Character.isLowSurrogate(lowSurrogate)) { + charIndex++; + charBuffer.put(nextChar); + charBuffer.put(lowSurrogate); + } else { + // missing low surrogate - fallback to replacement character + charBuffer.put(UNICODE_REPLACEMENT_CHAR); + } + } else { + // missing low surrogate - fallback to replacement character + charBuffer.put(UNICODE_REPLACEMENT_CHAR); + break; + } + } else { + charBuffer.put(nextChar); + } + } + charBuffer.flip(); + + // encode chars into bytes + encodeChars(charBuffer, false); + } catch (final Exception ex) { + throw new IOException(ex); + } + + return true; + } +} diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/DocumentUtil.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/DocumentUtil.java index bc7a34b24..4789ff0bf 100644 --- a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/DocumentUtil.java +++ b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/DocumentUtil.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2023 Avaloq Group AG. + * Copyright (c) 2023, 2024 Avaloq Group AG and others. * This program and the accompanying materials are made * available under the terms of the Eclipse Public License 2.0 * which is available at https://www.eclipse.org/legal/epl-2.0/ @@ -8,14 +8,19 @@ * * Contributors: * Rubén Porras Campo (Avaloq Group AG) - Initial Implementation + * Sebastian Thomschke - add getCharset method *******************************************************************************/ package org.eclipse.lsp4e.internal; +import java.nio.charset.Charset; + +import org.eclipse.core.filebuffers.ITextFileBuffer; import org.eclipse.core.resources.IFile; import org.eclipse.jdt.annotation.Nullable; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.IDocumentExtension4; import org.eclipse.lsp4e.LSPEclipseUtils; +import org.eclipse.lsp4e.LanguageServerPlugin; public final class DocumentUtil { @@ -45,4 +50,17 @@ public static long getDocumentModificationStamp(@Nullable IDocument document) { return IDocumentExtension4.UNKNOWN_MODIFICATION_STAMP; } + public static Charset getCharset(final IDocument document) { + final ITextFileBuffer buffer = LSPEclipseUtils.toBuffer(document); + if (buffer == null) + return Charset.defaultCharset(); + try { + final String charsetName = buffer.getEncoding(); + if (charsetName != null) + return Charset.forName(charsetName); + } catch (final Exception ex) { + LanguageServerPlugin.logError(ex); + } + return Charset.defaultCharset(); + } } From 14d95d4badaa04f2ade3d94eec21610f54bc689a Mon Sep 17 00:00:00 2001 From: Sebastian Thomschke Date: Tue, 20 Aug 2024 15:35:06 +0200 Subject: [PATCH 2/2] refact: move DocumentInputStream to internal package --- .../eclipse/lsp4e/test/internal/DocumentInputStreamTest.java | 2 +- org.eclipse.lsp4e/src/org/eclipse/lsp4e/LSPEclipseUtils.java | 1 + .../org/eclipse/lsp4e/{ => internal}/DocumentInputStream.java | 4 +--- 3 files changed, 3 insertions(+), 4 deletions(-) rename org.eclipse.lsp4e/src/org/eclipse/lsp4e/{ => internal}/DocumentInputStream.java (85%) diff --git a/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/DocumentInputStreamTest.java b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/DocumentInputStreamTest.java index dc1b35854..04f8ef7e9 100644 --- a/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/DocumentInputStreamTest.java +++ b/org.eclipse.lsp4e.test/src/org/eclipse/lsp4e/test/internal/DocumentInputStreamTest.java @@ -19,9 +19,9 @@ import org.eclipse.core.runtime.CoreException; import org.eclipse.jface.text.IDocument; -import org.eclipse.lsp4e.DocumentInputStream; import org.eclipse.lsp4e.LSPEclipseUtils; import org.eclipse.lsp4e.internal.CharsInputStream; +import org.eclipse.lsp4e.internal.DocumentInputStream; import org.eclipse.lsp4e.test.utils.AbstractTestWithProject; import org.eclipse.lsp4e.test.utils.TestUtils; import org.junit.Before; diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/LSPEclipseUtils.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/LSPEclipseUtils.java index 277080281..85d606316 100644 --- a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/LSPEclipseUtils.java +++ b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/LSPEclipseUtils.java @@ -90,6 +90,7 @@ import org.eclipse.jface.text.TextSelection; import org.eclipse.jface.viewers.ISelection; import org.eclipse.jface.viewers.ISelectionProvider; +import org.eclipse.lsp4e.internal.DocumentInputStream; import org.eclipse.lsp4e.refactoring.CreateFileChange; import org.eclipse.lsp4e.refactoring.DeleteExternalFile; import org.eclipse.lsp4e.refactoring.LSPTextChange; diff --git a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/DocumentInputStream.java b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/DocumentInputStream.java similarity index 85% rename from org.eclipse.lsp4e/src/org/eclipse/lsp4e/DocumentInputStream.java rename to org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/DocumentInputStream.java index f1b856cbc..b55d32b37 100644 --- a/org.eclipse.lsp4e/src/org/eclipse/lsp4e/DocumentInputStream.java +++ b/org.eclipse.lsp4e/src/org/eclipse/lsp4e/internal/DocumentInputStream.java @@ -9,11 +9,9 @@ * Contributors: * Sebastian Thomschke - initial implementation *******************************************************************************/ -package org.eclipse.lsp4e; +package org.eclipse.lsp4e.internal; import org.eclipse.jface.text.IDocument; -import org.eclipse.lsp4e.internal.CharsInputStream; -import org.eclipse.lsp4e.internal.DocumentUtil; public final class DocumentInputStream extends CharsInputStream {