Skip to content

Commit

Permalink
fix (core): Glob Resource Provider now supports e.g. **.java?charset=…
Browse files Browse the repository at this point in the history
…UTF-8
  • Loading branch information
vorburger committed Jun 23, 2024
1 parent 1767bf5 commit 0502e39
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 54 deletions.
5 changes: 4 additions & 1 deletion java/dev/enola/ai/ollama/OllamaMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import dev.enola.common.function.MoreStreams;
import dev.enola.common.io.resource.stream.GlobResourceProvider;
import dev.enola.common.io.resource.stream.GlobResourceProviders;

import io.github.amithkoujalgi.ollama4j.core.OllamaAPI;
import io.github.amithkoujalgi.ollama4j.core.exceptions.OllamaBaseException;
import io.github.amithkoujalgi.ollama4j.core.utils.OptionsBuilder;
Expand Down Expand Up @@ -64,7 +65,9 @@ public static void main(String[] args)
modelName,
"Here is a lot of source code: "
+ context
+ "\nNow please answer the following question about this code, and avoid repeating yourself: "
+ "\n"
+ "Now please answer the following question about this"
+ " code, and avoid repeating yourself: "
+ prompt,
// "Do not provide any reasoning, just answer the question",
options)
Expand Down
141 changes: 122 additions & 19 deletions java/dev/enola/common/io/iri/URIs.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,28 @@
*/
package dev.enola.common.io.iri;

import static java.util.Collections.emptyMap;

import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.net.MediaType;

import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

public final class URIs {
// see also class dev.enola.common.io.iri.IRIs

// TODO Review if all this String instead of URI-based processing could be removed and replaced
// with first encoding invalid special characters in URIs; see the related TBD in
// FileGlobResourceProvider.

// URI Query Parameter Names
private static final String MEDIA_TYPE = "mediaType";
private static final String CHARSET = "charset"; // as in MediaType#CHARSET_ATTRIBUTE
Expand Down Expand Up @@ -64,6 +71,20 @@ public static URI addQuery(URI uri, Map<String, String> parameters) {
return uri;
}

/**
* Returns an URI with everything except the query parameters of the uri (1st) parameter, but
* the query parameters of originalUriWithQuery (2nd) parameter - IFF the uri (1st) parameter
* has no query; otherwise just returns the uri (1st) parameter as-is.
*
* <p>See {@link URIsTest#testAddQueryGivenOriginalUriWithQuery()} for example.
*/
public static URI addQuery(URI uri, URI originalUriWithQuery) {
if (Strings.isNullOrEmpty(originalUriWithQuery.getQuery())) return uri;
if (!Strings.isNullOrEmpty(uri.getQuery())) return uri;

return URI.create(uri.toString() + "?" + originalUriWithQuery.getQuery());
}

public static String addQuery(String string, Map<String, String> parameters) {
return addQuery(URI.create(string), parameters).toString();
}
Expand All @@ -90,43 +111,117 @@ private static String encodeQueryParameterValue(String value) {
}

// package-local not public (for now)
static Map<String, String> getQueryMap(URI uri) {
if (uri == null) return Collections.emptyMap();
Map<String, String> map = new HashMap<>();
String query = uri.getQuery();
if (Strings.isNullOrEmpty(query)) {
var part = uri.getSchemeSpecificPart();
var qp = part.indexOf('?');
if (qp > -1) {
var fp = part.indexOf('#');
if (fp == -1) {
query = part.substring(qp + 1);
} else {
query = part.substring(qp + 1, fp);
}
static String getQueryString(String uri) {
var qp = uri.indexOf('?');
if (qp > -1) {
// Handle Glob URIs, like e.g. "file:/tmp//?.txt"
if (uri.indexOf('=', qp) == -1) return "";
var fp = uri.indexOf('#');
if (fp == -1) {
return uri.substring(qp + 1);
} else {
return uri.substring(qp + 1, fp);
}
}
return "";
}

public static Map<String, String> getQueryMap(String uri) {
return getQueryMapGivenQueryString(getQueryString(uri));
}

static Map<String, String> getQueryMapGivenQueryString(String query) {
if (Strings.isNullOrEmpty(query)) {
return Collections.emptyMap();
return emptyMap();
}
AMPERSAND_SPLITTER.split(query).forEach(queryParameter -> put(uri, queryParameter, map));
Map<String, String> map = new HashMap<>();
AMPERSAND_SPLITTER.split(query).forEach(queryParameter -> put(queryParameter, map));
return map;
}

private static void put(URI uri, String queryParameter, Map<String, String> map) {
// package-local not public (for now)
static Map<String, String> getQueryMap(URI uri) {
if (uri == null) return emptyMap();

String query = uri.getQuery();
if (Strings.isNullOrEmpty(query)) {
var part = uri.getSchemeSpecificPart();
query = getQueryString(part);
}
return getQueryMapGivenQueryString(query);
}

private static void put(String queryParameter, Map<String, String> map) {
var p = queryParameter.indexOf('=');
if (p == -1) {
map.put(queryParameter, null);
} else {
var key = queryParameter.substring(0, p);
if (map.containsKey(key))
throw new IllegalArgumentException(
uri.toString() + " ID URI ?query has duplicate key");
"URI Query Parameter has duplicate key: " + queryParameter);
var value = queryParameter.substring(p + 1);
map.put(key.toLowerCase(), value);
}
}

/**
* Get a {@link Path} from an {@link URI}. This method is used internally by {@link
* dev.enola.common.io.resource.Resource} framework implementations, and typically shouldn't be
* called directly by users. Please see the {@link dev.enola.common.io.resource.FileResource}
* for more related background.
*/
public static Path getFilePath(URI uri) {
// TODO Replace this with return Path.of(uri); but it needs more work...
// Both for relative file URIs and query parameters and ZIP files.
// Nota bene: https://stackoverflow.com/q/25032716/421602
// https://docs.oracle.com/javase/7/docs/technotes/guides/io/fsp/zipfilesystemprovider.html
// https://docs.oracle.com/en/java/javase/21/docs/api/jdk.zipfs/module-summary.html

var scheme = uri.getScheme();
var authority = uri.getAuthority();
var path = getPath(uri);
return getFilePath(scheme, authority, path);
}

public static Path getFilePath(String uri) {
var scheme = getScheme(uri);
var authority = ""; // TODO Implement getAuthority(String uri)
var path = getPath(uri);
return getFilePath(scheme, "", path);
}

private static Path getFilePath(String scheme, String authority, String path) {
// TODO Don't hard-code this to file: but use MoreFileSystems.URI_SCHEMAS, somehow...
if ("file".equals(scheme)) {
return FileSystems.getDefault().getPath(path);
} else
try {
// TODO Better null or "" for path?
URI fsURI = new URI(scheme, authority, "", null, null);
var fs = FileSystems.getFileSystem(fsURI);
return fs.getPath(path);
} catch (URISyntaxException e) {
// This is rather unexpected...
throw new IllegalStateException(
"Failed to create FileSystem Authority URI: " + scheme + ":" + path, e);
}
}

static String getScheme(String iri) {
if (iri == null) return "";
var p = iri.indexOf(':');
if (p == -1) return "";
return iri.substring(0, p);
}

static String getSchemeSpecificPart(String iri) {
if (iri == null) return "";
var p = iri.indexOf(':');
if (p == -1) return "";
return iri.substring(p + 1);
}

/**
* Get the "path"-like component of any URI. Similar to {@link URI#getPath()}, but also works
* e.g. for "non-standard" relative "file:hello.txt" URIs, and correctly chops off query
Expand All @@ -140,8 +235,16 @@ public static String getPath(URI uri) {
return chopFragmentAndQuery(uri.getSchemeSpecificPart());
}

public static String getPath(String uri) {
return chopFragmentAndQuery(getSchemeSpecificPart(uri));
}

private static String chopFragmentAndQuery(String ssp) {
var chop = ssp.indexOf('?', 0);

// Handle Glob URIs, like e.g. "file:/tmp//?.txt"
if (ssp.indexOf('=', chop) == -1) chop = -1;

if (chop == -1) chop = ssp.indexOf('#', 0);
if (chop == -1) chop = ssp.length();

Expand Down
54 changes: 53 additions & 1 deletion java/dev/enola/common/io/iri/URIsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Resources;
import com.google.common.net.MediaType;
import com.google.common.truth.Truth8;

import dev.enola.common.io.iri.URIs.MediaTypeAndOrCharset;

Expand All @@ -31,12 +32,13 @@
import java.io.File;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Path;

public class URIsTest {

@Test
public void testGetQueryMap() throws URISyntaxException {
assertThat(URIs.getQueryMap(null)).isEmpty();
assertThat(URIs.getQueryMap((URI) null)).isEmpty();
assertThat(URIs.getQueryMap(URI.create(""))).isEmpty();
assertThat(URIs.getQueryMap(URI.create("http://www.vorburger.ch"))).isEmpty();
assertThat(URIs.getQueryMap(URI.create("http://google.com?q=michi#fragment")))
Expand All @@ -49,6 +51,11 @@ public void testGetQueryMap() throws URISyntaxException {
.containsExactly("charset", "ASCII");
assertThat(URIs.getQueryMap(URI.create("scheme:thing?ping=pong=pang#fragment")))
.containsExactly("ping", "pong=pang");

// Glob URIs
assertThat(URIs.getQueryMap("file:/tmp//?.txt")).isEmpty();
assertThat(URIs.getQueryMap("file:/tmp//?q=.")).containsExactly("q", ".");
// TODO assertThat(URIs.getQueryMap(URI.create("file:/tmp//?.txt"))).isEmpty();
}

@Test
Expand Down Expand Up @@ -97,6 +104,19 @@ public void testAddQuery() {
.isEqualTo("http://host/path?arg1=a&arg2=b&arg3=c");
}

@Test
public void testAddQueryGivenOriginalUriWithQuery() {
var uri1 = URI.create("http://host/pathX");
var uri2 = URI.create("http://host/pathY?arg1=a");
var uri3 = URI.create("http://host/pathZ?arg2=b");
var uri4 = URI.create("http://host/pathZ");

assertThat(URIs.addQuery(uri1, uri2)).isEqualTo(URI.create("http://host/pathX?arg1=a"));
assertThat(URIs.addQuery(uri2, uri3)).isEqualTo(uri2);
assertThat(URIs.addQuery(uri1, uri4)).isEqualTo(uri1);
assertThat(URIs.addQuery(uri2, uri4)).isEqualTo(uri2);
}

@Test
public void testGetFilename() throws URISyntaxException {
// Files
Expand Down Expand Up @@ -187,5 +207,37 @@ public void testGetPath() {
assertThat(URIs.getPath(URI.create("whatever:/place/something.test")))
.isEqualTo("/place/something.test");
assertThat(URIs.getPath(URI.create("whatever:something.test"))).isEqualTo("something.test");

// Glob URIs
assertThat(URIs.getPath("file:/tmp//?.txt")).isEqualTo("/tmp//?.txt");
assertThat(URIs.getPath("file:/tmp//{xy}?q=.")).isEqualTo("/tmp//{xy}");
}

@Test
public void testGetFilePath() {
// Truth8.assertThat(Path.of("/tmp//{xy}?.txt#yo"))
// .isEqualTo(Path.of("/tmp/?.txt")); // TODO rm

Truth8.assertThat(URIs.getFilePath("file:/tmp/")).isEqualTo(Path.of("/tmp"));

// Glob URIs
Truth8.assertThat(URIs.getFilePath("file:/tmp//?.txt")).isEqualTo(Path.of("/tmp/?.txt"));
Truth8.assertThat(URIs.getFilePath("file:/tmp//{xy}?q=.")).isEqualTo(Path.of("/tmp/{xy}"));
}

@Test
public void testGetScheme() {
assertThat(URIs.getScheme("test:something")).isEqualTo("test");
assertThat(URIs.getScheme("rela/tive")).isEqualTo("");
assertThat(URIs.getScheme("/absolute/rela/tive")).isEqualTo("");
assertThat(URIs.getScheme(null)).isEqualTo("");
}

@Test
public void testGetSchemeSpecificPart() {
assertThat(URIs.getSchemeSpecificPart("test:something")).isEqualTo("something");
assertThat(URIs.getSchemeSpecificPart("rela/tive")).isEqualTo("");
assertThat(URIs.getSchemeSpecificPart("/absolute/rela/tive")).isEqualTo("");
assertThat(URIs.getSchemeSpecificPart(null)).isEqualTo("");
}
}
32 changes: 4 additions & 28 deletions java/dev/enola/common/io/resource/FileResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.*;
import java.time.Instant;
import java.util.Arrays;
Expand Down Expand Up @@ -67,43 +66,20 @@ public Resource getResource(URI uri) {
private final Path path;
private final OpenOption[] openOptions;

private static Path pathFromURI(URI uri) {
// TODO Replace this with return Path.of(uri); but it needs more work...
// Both for relative file URIs and query parameters and ZIP files.
// Nota bene: https://stackoverflow.com/q/25032716/421602
// https://docs.oracle.com/javase/7/docs/technotes/guides/io/fsp/zipfilesystemprovider.html
// https://docs.oracle.com/en/java/javase/21/docs/api/jdk.zipfs/module-summary.html

var path = URIs.getPath(uri);
var scheme = uri.getScheme();
if ("file".equals(scheme)) {
return FileSystems.getDefault().getPath(path);
} else
try {
URI fsURI = new URI(scheme, uri.getAuthority(), "", null, null);
var fs = FileSystems.getFileSystem(fsURI);
return fs.getPath(path);
} catch (URISyntaxException e) {
// This is rather unexpected...
throw new IllegalStateException(
"Failed to create FileSystem Authority URI: " + uri.toString(), e);
}
}

public FileResource(URI uri, MediaType mediaType, OpenOption... openOptions) {
super(uri, mediaType);
this.path = pathFromURI(uri);
this.path = URIs.getFilePath(uri);
this.openOptions = safe(openOptions);
}

public FileResource(URI uri, OpenOption... openOptions) {
super(uri, MoreFiles.asByteSource(pathFromURI(uri), openOptions));
this.path = pathFromURI(uri);
super(uri, MoreFiles.asByteSource(URIs.getFilePath(uri), openOptions));
this.path = URIs.getFilePath(uri);
this.openOptions = safe(openOptions);
}

private static OpenOption[] safe(OpenOption[] openOptions) {
if (openOptions.length == 0) return EMPTY_OPTIONS;
if (openOptions.length == 0) return EMPTY_OPTIONS; // skipcq: JAVA-S1049
else return Arrays.copyOf(openOptions, openOptions.length);
}

Expand Down
8 changes: 8 additions & 0 deletions java/dev/enola/common/io/resource/ResourceProvider.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,18 @@ public interface ResourceProvider extends ProviderFromIRI<Resource> {
return getResource(uri);
}

default @Nullable ReadableResource getReadableResource(String iri) {
return get(iri);
}

default @Nullable WritableResource getWritableResource(URI uri) {
return getResource(uri);
}

default @Nullable WritableResource getWritableResource(String iri) {
return get(iri);
}

// -------------------------------------------

default @Nullable ReadableResource getReadableResource(URL url) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ static Stream<Path> walk(Path globPath) throws IOException {
if (starPos > 0) basePath = Path.of(globString.substring(0, starPos - 1));
else basePath = new File(".").toPath();

// TODO Use URI instead of Path, and FileResource's#pathFromURI() !!!
// Inspired by File.newDirectoryStream(), but matching full path, not just getFileName()
var fs = basePath.getFileSystem();
var matcher = fs.getPathMatcher("glob:" + globString);
Expand Down
Loading

0 comments on commit 0502e39

Please sign in to comment.