-
Notifications
You must be signed in to change notification settings - Fork 386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
F2F Followup: Magic Window (and friends) Proposal #179
Comments
Quick note, mostly so I don't forget: After looking at some other APIs it seems like |
This looks quite good. I need to look at this over a few more times.
|
It's a bit rough and admittedly perhaps a bit too verbose, but wanted to see if anyone sees anything worth including/excluding here: partial interface Navigator {
[SameObject] readonly attribute VR? vr;
};
// See Device Capabilities API for inspiration:
// http://device-capabilities.sysapps.org/
[NoInterfaceObject]
interface VRDisplayCapabilities {
readonly attribute boolean availPosition;
readonly attribute boolean availOrientation;
readonly attribute boolean availExternalDisplay;
readonly attribute boolean present;
readonly attribute boolean displayDPI;
readonly attribute pixelDepth;
readonly attribute unsigned long maxLayers;
};
dictionary VRDisplayFilter {
unsigned long displayId;
DOMString displayName;
DOMString displayNamePrefix;
VRDisplayCapabilities capabilities;
};
dictionary VRRequestDisplayOptions {
required sequence<VRDisplayFilter> filters;
required VRRequestType requestType;
};
interface VR : EventTarget {
[SecureContext] Promise<boolean> getAvailability();
[SecureContext] attribute EventHandler onavailabilitychanged;
//[SecureContext, SameObject] readonly attribute VRDisplay? referringDevice;
[SecureContext, SameObject] readonly attribute sequence<VRDisplay>? referringSessions;
[SecureContext] Promise<sequence<VRDisplay>> getDisplays();
Promise<VRDisplay> requestDisplay(optional VRRequestDisplayOptions options);
};
VR implements VREventHandlers;
[NoInterfaceObject]
interface VREventHandlers {
attribute EventHandler ondisplayconnect;
attribute EventHandler ondisplaydisconnect;
attribute EventHandler ondisplaymount;
attribute EventHandler ondisplayunmount;
attribute EventHandler ondisplayvisibilitychange;
attribute EventHandler ondisplayblurr;
attribute EventHandler ondisplayfocus;
attribute EventHandler ondisplaypresentchange;
attribute EventHandler ondisplaypresentbegin;
attribute EventHandler ondisplaypresentend;
};
/*
// Permissions API integration
//
// In addition to `navigator.vr.requestDisplay()`, sites can use this API:
navigator.permissions.request({
name: 'vr',
filters: [{
displayNamePrefix: 'Oculus Rift'
}]
}).then(result => {
if (result.displays.length > 1) {
return result.displays[0];
} else {
throw new DOMException('Chooser canceled', 'NotFoundError');
}
}).then(display => {
window.sessionStorage.vrLastDisplay = display.id;
});
*/
// For Permissions API.
dictionary AllowedVRDisplay {
DOMString? manufacturerName;
DOMString? productName;
DOMString? displayName;
required boolean mayUseMagicWindow;
// An `allowedDisplays` of `"all"` means all displays are allowed.
required DOMString allowedDisplays;
};
dictionary VRPermissionDescriptor : PermissionDescriptor {
DOMString displayId;
sequence<VRDisplayFilter> filters;
boolean acceptAllDisplays = true;
};
dictionary VRPermissionData {
required sequence<AllowedVRDisplay> allowedDevices = [];
};
dictionary VRSessionEventInit : EventInit {
required VRDisplay display;
};
[Constructor(DOMString type, VRSessionEventInit eventInitDict)]
interface VRSessionEvent : Event {
[SameObject] readonly attribute VRDisplay display;
};
enum VRRequestType {
"standard",
"stereo"
};
enum VRDisplayState {
"mounted",
"unmounted",
"pending"
};
enum VRSessionState {
"connecting",
"connected",
"closed",
"terminated"
};
enum VRSessionVisibilityState {
"hidden",
"visible",
"prerender",
"unloaded"
};
interface VRAccess : EventTarget {
readonly attribute VRDisplayMap displays;
attribute EventHandler onstatechange;
};
interface VRDisplay : EventTarget {
readonly attribute DOMString? id;
readonly attribute DOMString? manufacturerName;
readonly attribute DOMString? productName;
readonly attribute DOMString? displayName;
readonly attribute VRDisplayState state;
attribute EventHandler onstatechange;
// readonly attribute boolean connected;
// readonly attribute boolean mounted;
// readonly attribute boolean focused;
Promise<void> focus();
Promise<void> blur();
readonly attribute VRSession session;
readonly attribute boolean hidden;
attribute EventHandler onstatechange;
Promise<void> requestPresent(VRRequestType type);
Promise<void> exitPresent();
};
interface VRSession : EventTarget {
readonly attribute DOMString? id;
readonly attribute VRDisplay display;
readonly attribute VRRequestType type; // "standard", "stereo"
// à la http://device-capabilities.sysapps.org/#systemdisplay-interface
readonly attribute boolean external;
readonly attribute boolean primary;
readonly attribute VRSessionVisibilityState visibilityState;
attribute EventHandler onvisibilitychange;
readonly attribute VRSessionVisibilityState state;
attribute EventHandler onstatechange;
Promise<void> focus();
Promise<void> blur();
Promise<void> open(); // Opened by default when created.
Promise<void> close();
// This is tied up in the discussion of #149 and #178
boolean availFrameOfReference(VRFrameOfReferenceType type);
Promise<VRFrameData>? getFrameData(VRFrameData frameData);
/**
* Return a VRPose containing the future predicted pose of the VRDisplay
* when the current frame will be presented. The value returned will not
* change until JavaScript has returned control to the browser.
*
* The VRPose will contain the position, orientation, velocity,
* and acceleration of each of these properties.
*/
[NewObject] VRPose getPose();
attribute EventHandler onmessage;
void sendFrame(); // Replaces `VRDisplay#submitFrame`
};
VRDisplay implements VRDisplayEventHandlers;
// A presentation context that only returns pose data
interface VRPosePresentationContext : VRPresentationContext {
// Not sure what else would be useful here.
};
// A presentation context that can render to the HMD
interface VRStereoSession : VRSession {
attribute double depthNear;
attribute double depthFar;
// Layer interface hasn't been decided on yet, so just presume that this is a
// thing that you set the layers with.
VRLayerList layers;
unsigned long requestVRFrame(FrameRequestCallback callback);
void cancelVRFrame(unsigned long handle);
VRCanvasSize getCanvasSize(optional float scale);
};
// See Device Capabilities API for inspiration:
// See http://device-capabilities.sysapps.org/#systemdisplay-interface
[NoInterfaceObject]
interface VRCanvasSize {
readonly attribute float pixelDepth;
readonly attribute unsigned long width;
readonly attribute unsigned long height;
}; |
Minor comment, haven't read your full idl yet @cvan: I actually didn't mean to include |
Had some more time to look over your IDL. Definitely some bits I like in there, though for the sake of clarity I'm going to suggest that many things like the filter and permissions be broken out into their own proposals (I do like how your One piece of advice I've received recently is: "Look at your feature set and assume you need to drop 50% of it. Find the subset of functionality you simply can't launch without, and focus on launching that. Increase complexity afterwards." I think that's worth taking to heart, so I think I'm going to start throwing out a fair amount of "Yes, but later" in my spec responses. Nothing personal, just trying to control scope and get to a successful v1. I want to post a revised IDL that pulls in some of your feedback while trying to focus on the feature in question, but holy crap it's later than I thought and I have more Khronos meetings in the morning so for now you get some general notes/questions:
|
I really like the idea of focusing on the very basics, things that are a MUST have to launch and then build on top of them. I think the changes proposes in this thread are getting a bit too complex both for the developer/end user and for the vendors. I think it would be more important/useful to understand the motivation/use cases for these changes and build an API that is as simple as possible to fulfill these. I might miss something in these motivations, but correct anything you may find: The main motivation for this proposal is that some VR ready devices, specially mobile devices like phones, allow 2 modes of presentation: HMD stereoscopic mode and what it has been called as "magic window" (term that I like but I think is too vague) or fullscreen monoscopic mode. AR devices like Tango enabled phones/tablets are also in need of such fullscreen monoscopic mode. There is another motivation related to it as there are some devices that might allow 3DOF and 6DOF tracking and it might be important, from the better result point of view, to allow to select from one or the other, as the underlying technologies might be pretty different. As an example, the announced ASUS Zenfone AR phone will be Tango and Daydream ready. It might provide 6DOF tracking in "tango mode" === fullscreen monoscopic for the most part but the 3DOF Daydream experience might be much better due to the timewarp capabilities. This might be an over simplification of the scenario but I think it fits well with the current status of the needs in the space from some of the most notorious devices out there. As you can imagine, an implementation that supports all of the scenarios is quite an browser vendor nightmare. So there are 2 points of view: the developer needs to have a clean and as easy as possible API that is powerful enough to cover the most basic use cases. The implementer needs to have a way to get all the technologies together while implementing the API. There will be a clash for sure between these 2 perspectives. My proposal would be to think of the simplest possible way to resolve the situation. In order to do so what I see is 2 main elements: 1.- Expose the capabilities of the VRDisplay: I know we are trying to get away from the VRCapabilities route but I also think it is interesting to have some kind of structure that exposes the features available in the display as they can be pretty different: hasPosition, allowsMonoPresentation (naming is not definitive) could resolve the cases mentioned above as I cannot imagine a device that does not allow stereoscopic presentation and orientation at least. 2.- Allow the developer to specify what mode of presentation he/she wants to use: at requestPresent, a set of the properties that the app needs/wants is passed so the user agent knows what kind of technology to use underneath: { usePosition: true } might resolve all the situations above. Mono or stereo can be resolved in the rendering phase with the information from the VRPose inside the VRFrameData structure. Of course, a projection matrix might be needed. DRAWBACKS: You might see more drawbacks than me :) but the way I see it the biggest issue might be how to change from one mode to another in the same application. The way I see it, stop presenting and requesting to present again could be the way to go. It might have some user generated event problems but at least for now, the only scenario would be to change from mono to stereo and/or from 3DOF to 6DOF and these changes might, effectively, need user interaction. Imagine a Tango device changing from fullscreen monoscopic to HMD stereoscopic (needs user interaction to place the device on a headset) for example. I know it is drifting away from the well designed more complex and for sure, more future-ready approaches commented in the previous proposals, but I think a simpler approach could be better. Please, let me know if there are use cases that lie out of this proposal. |
Forgive me for the massive IDL brainstorming I dropped in the comment above. I think you can see I'm also a bit curious about potential overlap with the Presentation API (see this post), and the Google Cast Web SDK (a simple JS content library for Chrome on Android, iOS, etc.). The Presentation API has the concept of sessions that can started from a URL like so: // Per implementation in Chrome for Android and desktop
var req = new PresentationRequest([
'https://example.com/shadows_and_fog.html'
];
request.getAvailability().then(availability => {
console.log('Has external display to present to?', availability.value); // This would be `false` in the case of desktop (e.g., today, that's desktop Chrome vs. Chrome for Android).
availability.addEventListener('change', evt => {
console.log('Has external display to present to?', availability.value);
});
}); And there are some examples in the spec and another good example here.
This has been brought up in a few specs and docs:
I think this is where we could potentially extend the Media Capabilities API to understand VR display capabilities (and configurations) as either navigator.mediaCapabilities.query({
type: 'VRSource',
leftBounds: [0.0, 0.0, 0.5, 1.0],
rightBounds: [0.5, 0.0, 0.5, 1.0],
display: { // Any booleans currently hanging off `VRDisplayCapabilities` (https://w3c.github.io/webvr/#interface-vrdisplaycapabilities) would be able to be queried.
hasOrientation: true
},
video: {
type: 'video/webm codec=vp9.0',
dimension: '360-lr', // Totally optional. Inspiration here is see Samsung's proprietary `<video>` extensions for Samsung Internet Browser for Gear VR: https://pres.webvr.io/video.html
width: 1920,
height: 1080,
num_inputs: 1
},
display_mode: 'horz_side_by_side' // Inspiration: https://github.com/OSVR/OSVR-JSON-Schemas/blob/master/doc/display_descriptor_schema_v1/Structure%20of%20JSON%20descriptor%20file%20for%20HMDs.md
}).then(result => {
console.log(result.isSupported); // `true`
console.log(result.hasPosition); // `false`
// See https://github.com/WICG/media-capabilities/blob/master/explainer.md#examples for similar example usage.
}).catch(console.error.bind(console));
console.log(window.screen.hasExternalDisplay);
Web MIDI allows this in the form of a method invoked like this: navigator.requestMIDIAccess({sysex: true, software: true}).then(access => {
access.inputs.forEach(input => {
console.log('ID: %s; Name: %s; Manufacturer: %s Version: %s',
input.id, input.name, input.manufacturer, input.version);
});
}, err => {
console.warn('Failed to get MIDI access', err);
}); And Web Bluetooth also has a similar API with similar usage. Bluetooth and USB are quite complicated APIs, so I've rewritten the API example from the spec for, albeit barely, improved clarity: navigator.bluetooth.requestDevice({
filters: [
{
services: [
'heart_rate'
]
}
]
})
.then(device => device.gatt.connect())
.then(server => server.getPrimaryService('heart_rate'))
.then(service => service.getCharacteristic('heart_rate_measurement'))
.then(characteristic => characteristic.startNotifications)
.then(char => {
console.log('Characteristic', char, 'value: %s', char.readValue());
char.addEventListener('characteristicvaluechanged', evt => {
console.log('Characteristic', evt.target, 'changed: %s', evt.value);
});
}).catch(console.error.bind(console)); I'm definitely not suggesting going that overboard with the WebVR API. So, for WebVR, based on your comments above and my WIP IDL above, I think something like this would be nice: function present (display) => {
return display.requestPresent([{source: canvas}]);
}
navigator.vr.requestDisplay({mode: 'stereo', position: true}).then(present, err => {
console.warn('Could not find a matching display.');
return navigator.vr.getDisplays().then(displays => {
if (!displays.length) {
return Promise.resolve(null);
}
return present(display);
});
}).catch(console.warn.bind(console)); Also, see #182 for a related nit-y suggestion. |
Some comments on @toji 's proposal. (I haven't had a chance to parse cvan's idl yet - sorry)
|
Been going over a lot of the feedback and trying to simplify my own proposal (simplify simplify simplify!) and wanted to present my latest pass at it. Rough IDL:
Example Usage:
Comments: It also smooshes the different Session types into a single one for simplicity, though it does raise some questions about usage. For instance: On that note, I'm also still not sure what non-presenting mode should do about projection matrices, since they'll need to know a canvas width and height to accurately provide one. You could attach a Canvas to the layers and use that implicitly, but that carries with it a lot of implications about how we're using the canvas that probably aren't true in non-presenting mode. There's a similar issue with the view matrices, but realistically both eyes could just report the same matrix as the pose and it would work out fine. Worth pointing out that I've moved Naming has changed a bit. It started feeling weird to call |
I like your proposal @cvan, but I have some comments :). The way I see it, a display can support both monocular and binocular modes (any mobile phone/tablet for example) and the difference should be noticed either when requesting presentation or just simply by using the data from the VRFrameData as desired (to render 1 or 2 eyes). We may assume that orientation is always provided (and desired), but position is onl I see requestDisplay as a "nice to have" feature to filter but I still think it is important to specify the capabilities you want to use in your experience in the requestPresent phase as a device could provide positional tracking but you do not want to use it (a 3DOF experience on a Tango + Daydream enable device for example === ASUS Zenfone AR). Regarding requestDisplay: a) Could it be possible that more than one display in the system fulfills the same criteria? Then an array if displays would make more sense. I will provide some possible scenarios and resolutions (using getDisplays with filter for ease of use). 1.- Binocular rendering with only orientation (cardboard, gear VR, daydream).
2.- Binocular with position (Vive, Rift, PSVR, Tango VR):
3.- Monocular with position and see through camera (Tango AR):
I would also dare to say that we could assume some "default" modes of operation for a device. This is tricky, that is why I dare to mention it but I also assume that the API could live without it. I mean, if no parameters are passed to the requestPresent call, a default mode is assumed: A- Vive, Rift, PSVR: {mode: "binocular", orientation: true, position: true} Tango would be included in the B even though it may sound like it should be otherwise. |
I like the idea of passing display capabilities. A couple comments:
|
Thank you very much for putting this together, @toji . I am favoring your simplified proposal around sessions. A comment about canvas size: |
That's a very good suggestion! I like the intent behind "outputSize", but then I question if it's actually "inputSize", since it's the intended size of the render target being passed into the VR compositor. "displaySize" sounds like it describes the physical display properties, and we don't want people to think this is the actual screen resolution. "sourceProperties" is less ambiguous, so I like that. "renderTargetProperties" similarly sound nice and explicit, though it's more verbose. |
This is a great proposal, @toji ! A few comments and questions:
Overall, this is awesome. Thanks for putting this together! |
Thanks for the comments Nell! A few quick responses:
To be honest I'm not completely sold on it yet, but I do agree that as much as possible it would be good to carry these patterns throughout the API.
Yes, that's my intent.
Haven't made any updated proposals for mirroring yet. For the moment I'm assuming it would work the same way that it does in 1.1, but I do feel it could be improved.
There's a couple of ways this could go, and I'm not sure if there's a particular advantage to one over the other, would appreciate some further thoughts! If we put an event on the session it would have to simply be an 'onend` event. That's probably sufficient, since I don't know that you need an event for the session starting. (It's always requested by the app).
Yes! Absolutely. Spec language should very explicitly tie the user gesture requirement to a presenting session.
Agreed. I don't mind discussing it, but I think it's a better conversation to have in a separate issue rather than trying to squeeze it in here. Definitely worth reiterating that accepting with a proposal like this one isn't carving it in stone, it's just setting up the basic API shape and details will continue to be hammered out afterwards. |
@speigg Good comments. The seeThroughCamera concept is still blurry as there is research on leveraging other web APIs in order to be able to use the seeThroughCamera (that is actually a thing) and how a seeThrough device like Hololens could also fit. Thank you. I agree with @NellWaliczek that this should be discussed elsewhere, but it was being used as an example of use cases of monocular/binocular (Tango) and how the API could operate (with layers) as it was a subject of the discussion. I really believe we should be guided by use cases more than assumptions/future vision of the space. I am completely ok not mentioning it again anyway. On the binocular/monocular Vs magic window naming, the biggest problem that was mentioned is that magic window is an abstract concept that needs explanation and actually it can also be mistaken with a virtual magic window concept (having windows to virtual worlds inside VR), whereas stereo/mono binocular/monocular is explicit and clear. IMHO, being an HMD like glass would not make a difference as it would still be monocular/fullscreen, right? I might be missing something. |
Breaking out the onsessionchange comment into a separate reply...
That's a clarification I hadn't realized I was looking for! Is your expectation is that requesting a new session would automatically terminate a previous session or that requests to create new sessions be rejected until the previous session is ended? If the latter, then I agree that an 'onend' event on the VRSession is better. If the former, then we should leave the 'onsessionchange' event on VRDisplay with the parameters for the old/new VRSessions in the event args. |
@judax I don't want to presume to speak for @toji intentions, but my hunch is that the difference is that 'presentation' mode encapsulates a number of related behaviors:
|
@judax Presenting to an HMD, whether monocular or binocular, warrants different design decisions compared to a handheld Magic Window display. In other words, Monocular vs Binocular really doesn't matter so much when it comes to deciding how to size & layout content and UI, but the fact that the user has the display in front of their face vs in their hands is a big deal. If "magic mirror" or "magic lens" is confusing as a mode, perhaps "handheld" vs "head-mounted" makes more sense? |
Yes, @NellWaliczek but I do not see why the "simplified" proposal would not resolve the same scenarios:
Monocular ("magic window") in fullscreen would require use interaction AFAIK.
VRPose includes one position and orientation apart from the ones for each eye. Adding a projection matrix too would solve this. AFAIK @toji 's proposal also considers this as necessary.
I think this applies too.
I might be missing something but this is an implementation detail and it would depend on the specific technology underneath. In DD, googlevr could be used for monocular/binocular implementation on DD ready phones for 3DOF. For 6DOF/AR Tango could be used and the "regular" rendering pipeline. Of course, I only talk about the tech I know :). Please, do not hesitate to provide use cases where this is not applicable. |
I've been working under the assumption (that needs to be put into text) that beginning a new session terminates the previously active one. An exception would be if you requested sessions with the same capabilities twice, in which case the previously requested session would be returned again.
Yes, but at that point the user gesture would be required by the |
@judax That's an interesting point about the projection matrices, if I'm understanding you correctly. I'd originally been operating under the assumption that the page developer could pick whatever projection they wanted in the magic window case, but that's really not true for Tango, is it? |
One minor tweak to this proposal: I've started to prefer Thoughts? Anyone have an allergic reaction to that idea? |
I'm slightly concerned that non-exclusive is too permissive for magic window mode. I feel that high-fidelity device orientation and potentially other input for magic window should be tied to input focus, so that we don't have backgrounded pages/ads/etc. trying to figure out what you're looking at in VR, and input focus is exclusive. |
Most of this has been incorporated into the explainer now, and further magic window discussion is happening on #237. Closing this issue. |
Support for polling device poses when not presenting for the purposes of rendering a tracked view to the DOM (a.k.a.: Magic window) has been a bit of a contentious feature of the API thus far. Some vendors see the feature as critical and others can't support it at all due to platform constraints. In it's current form the capability to get frame data at any time is implied, and writing up spec language to describe when it will and won't work has been challenging. This proposal attempts to make this mode a clearly defined feature (rather than an awkward side effect) that can be implemented validly in a variety of different ways or not at all. In doing so it also attempts to solve a smattering of other issues that have come up with the spec.
Issues being adressed:
VRDisplay.rAF
should work when not presenting, and when the pose should update.VRDisplay
without spinning up the tracking services at all so that pages can ask for available hardware in a lightweight way.Overview:
I'm proposing that we move much of the existing functionality out of
VRDisplay
and into a new object:VRPresentationContext
. This new object is produced when arequestPresent
call successfully resolves, and is valid only when presenting.To handle the different needs of magic window vs. HMD use, different types of presentation contexts can be requested, each of which contains functionality specifically for that use case. For example, the
VRPosePresentationContext
(for Magic Window mode) only provides FrameData. Because you have to call requestPresent to get that context, though, Magic Window mode becomes much easier for implementations to tightly control and functions much more like standard HMD presentation.This also leaves the door open for new types of presentation contexts to be added in the future if we see the need! For example: Justin's "Actual Magic Window" concept from the F2F could be surfaced as a new context type.
The
VRDisplay
can choose to support any combination of presentation contexts, so having a platform that doesn't support magic window is no longer an issue:requestPresent
simply rejects if you ask for that type of context. Additionally the presentation contexts for a single display could easily be provided by different backends and expose different capabilities. For example: With the Asus Zenphone mentioned above that supports Tango and Daydream but not simultaneously theVRPosePresentationContext
could be backed by the Tango API and provide 6DoF tracking, but theVRStereoPresentationContext
would be backed by the Daydream API and only allow 3DoF. Beyond that, if the user requested a 3DoF Frame of Reference with theVRPosePresentationContext
the implementation could fall back to only using device orientation events to save battery.By moving functionality like querying the recommended canvas size onto the presentation contexts, we also avoid issues where the platform may not be in a good state to report those values until it's actually presenting. It also means that the amount of information that's needed to enumerate the devices is dramatically lower, which should allow pages to poll for connected devices without spinning up the associated tracking services (platform support pending, of course.)
Incidentally this approach also makes drive-by fingerprinting less of an issue, since a good deal of the information provided by the API has been relocated to a place that requires a user gesture to get to.
Rough IDL:
Example usage:
Oustanding questions:
The text was updated successfully, but these errors were encountered: