Skip to content

Commit

Permalink
Merge pull request #11 from creativeIKEP/develop
Browse files Browse the repository at this point in the history
v1.2.0
  • Loading branch information
creativeIKEP authored Feb 15, 2022
2 parents 88f45dd + 426c279 commit be6db72
Show file tree
Hide file tree
Showing 20 changed files with 268 additions and 295 deletions.
3 changes: 0 additions & 3 deletions Assets/Images/Dance.mp4

This file was deleted.

3 changes: 0 additions & 3 deletions Assets/Images/Fitness.mp4

This file was deleted.

3 changes: 3 additions & 0 deletions Assets/Images/SignLanguage.mp4
Git LFS file not shown

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Assets/Images/jump.mp4
Git LFS file not shown

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Assets/Scenes/2DSampleScene.unity
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ VideoPlayer:
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 814657130}
m_Enabled: 1
m_VideoClip: {fileID: 32900000, guid: 6f19fa9b6a47cd643bcd9dfde6e6e2c5, type: 3}
m_VideoClip: {fileID: 32900000, guid: 750a2e5ba4c61744eafb92fd15b2b8b8, type: 3}
m_TargetCameraAlpha: 1
m_TargetCamera3DLayout: 0
m_TargetCamera: {fileID: 0}
Expand Down
2 changes: 1 addition & 1 deletion Assets/Scenes/3DSampleScene.unity
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ VideoPlayer:
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 636196907}
m_Enabled: 1
m_VideoClip: {fileID: 32900000, guid: 331e98e799a96d845bfe6c1a73a92c37, type: 3}
m_VideoClip: {fileID: 32900000, guid: 2258e888fb73969408bc31cee3e518d3, type: 3}
m_TargetCameraAlpha: 1
m_TargetCamera3DLayout: 0
m_TargetCamera: {fileID: 0}
Expand Down
31 changes: 25 additions & 6 deletions Assets/Script/PoseVisuallizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@ public class PoseVisuallizer : MonoBehaviour
[SerializeField] WebCamInput webCamInput;
[SerializeField] RawImage inputImageUI;
[SerializeField] Shader shader;
[SerializeField] BlazePoseResource blazePoseResource;
[SerializeField, Range(0, 1)] float humanExistThreshold = 0.5f;
[SerializeField] BlazePoseModel poseLandmarkModel;

Material material;
BlazePoseDetecter detecter;
Expand All @@ -32,22 +30,43 @@ public class PoseVisuallizer : MonoBehaviour

void Start(){
material = new Material(shader);
detecter = new BlazePoseDetecter(blazePoseResource, poseLandmarkModel);
detecter = new BlazePoseDetecter();
}

void LateUpdate(){
inputImageUI.texture = webCamInput.inputImageTexture;

// Predict pose by neural network model.
// Switchable anytime models with 2nd argment.
detecter.ProcessImage(webCamInput.inputImageTexture, poseLandmarkModel);
detecter.ProcessImage(webCamInput.inputImageTexture);

// Output landmark values(33 values) and the score whether human pose is visible (1 values).
for(int i = 0; i < detecter.vertexCount + 1; i++){
/*
0~32 index datas are pose landmark.
Check below Mediapipe document about relation between index and landmark position.
https://google.github.io/mediapipe/solutions/pose#pose-landmark-model-blazepose-ghum-3d
Each data factors are
x: x cordinate value of pose landmark ([0, 1]).
y: y cordinate value of pose landmark ([0, 1]).
z: Landmark depth with the depth at the midpoint of hips being the origin.
The smaller the value the closer the landmark is to the camera. ([0, 1]).
This value is full body mode only.
**The use of this value is not recommended beacuse in development.**
w: The score of whether the landmark position is visible ([0, 1]).
33 index data is the score whether human pose is visible ([0, 1]).
This data is (score, 0, 0, 0).
*/
Debug.LogFormat("{0}: {1}", i, detecter.GetPoseLandmark(i));
}
Debug.Log("---");
}

void OnRenderObject(){
var w = inputImageUI.rectTransform.rect.width;
var h = inputImageUI.rectTransform.rect.height;

// Set predicted pose landmark results.
// Use predicted pose landmark results on the ComputeBuffer (GPU) memory.
material.SetBuffer("_vertices", detecter.outputBuffer);
// Set pose landmark counts.
material.SetInt("_keypointCount", detecter.vertexCount);
Expand Down
26 changes: 20 additions & 6 deletions Assets/Script/PoseVisuallizer3D.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ public class PoseVisuallizer3D : MonoBehaviour
[SerializeField] WebCamInput webCamInput;
[SerializeField] RawImage inputImageUI;
[SerializeField] Shader shader;
[SerializeField] BlazePoseResource blazePoseResource;
[SerializeField, Range(0, 1)] float humanExistThreshold = 0.5f;
[SerializeField] BlazePoseModel poseLandmarkModel;

Material material;
BlazePoseDetecter detecter;
Expand All @@ -33,7 +31,7 @@ public class PoseVisuallizer3D : MonoBehaviour

void Start(){
material = new Material(shader);
detecter = new BlazePoseDetecter(blazePoseResource, poseLandmarkModel);
detecter = new BlazePoseDetecter();
}

void Update(){
Expand All @@ -44,12 +42,28 @@ void LateUpdate(){
inputImageUI.texture = webCamInput.inputImageTexture;

// Predict pose by neural network model.
// Switchable anytime models with 2nd argment.
detecter.ProcessImage(webCamInput.inputImageTexture, poseLandmarkModel);
detecter.ProcessImage(webCamInput.inputImageTexture);

// Output landmark values(33 values) and the score whether human pose is visible (1 values).
for(int i = 0; i < detecter.vertexCount + 1; i++){
/*
0~32 index datas are pose world landmark.
Check below Mediapipe document about relation between index and landmark position.
https://google.github.io/mediapipe/solutions/pose#pose-landmark-model-blazepose-ghum-3d
Each data factors are
x, y and z: Real-world 3D coordinates in meters with the origin at the center between hips.
w: The score of whether the world landmark position is visible ([0, 1]).
33 index data is the score whether human pose is visible ([0, 1]).
This data is (score, 0, 0, 0).
*/
Debug.LogFormat("{0}: {1}", i, detecter.GetPoseWorldLandmark(i));
}
Debug.Log("---");
}

void OnRenderObject(){
// Set predicted pose world landmark results.
// Use predicted pose world landmark results on the ComputeBuffer (GPU) memory.
material.SetBuffer("_worldVertices", detecter.worldLandmarkBuffer);
// Set pose landmark counts.
material.SetInt("_keypointCount", detecter.vertexCount);
Expand Down
7 changes: 7 additions & 0 deletions Packages/BlazePoseBarracuda/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
### [1.2.0] - 2021-02-15
- Improve the stability of pose estimation.
- Fixed an issue that pose estimation was not performed correctly when the color space was Liner.
- Add new methods (`GetPoseLandmark` and `GetPoseWorldLandmark`) for accessing data with CPU (C#).
- Automatically load `BlazePoseResource` asset data. The constructor arguments are not required.
- The `BlazePoseModel` argument is now optional. The only argument in the inference is the `Texture` data.

### [1.1.1] - 2021-10-04
Change the install method from GitHub URL to [npmjs.com](https://www.npmjs.com/).
Source code change is nothing.
Expand Down
154 changes: 103 additions & 51 deletions Packages/BlazePoseBarracuda/ComputeShader/Process.compute
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@

#define PI 3.14159265359

#include "UnityCG.cginc"
#include "PoseRegion.cginc"
#include "LowPassFilter.cginc"
#include "Misc.cginc"
#include "Packages/jp.ikep.mediapipe.posedetection/ComputeShader/Common.cginc"


// Kernel 0
uint _isLinerColorSpace;
uint _letterboxWidth;
float2 _spadScale;
sampler2D _letterboxInput;
RWTexture2D<float4> _letterboxTexture;
RWStructuredBuffer<float> _letterboxTextureBuffer;

// Generate letter-box image texture.
[numthreads(8, 8, 1)]
Expand All @@ -24,7 +26,7 @@ void LetterBoxImage(uint2 id : SV_DispatchThreadID)
if (any(id > _letterboxWidth)) return;

// UV coordinates
float2 uv = (id + 0.5) / _letterboxWidth;
float2 uv = float2(0.5 + id.x, _letterboxWidth - 0.5 - id.y) / _letterboxWidth;

// Scaling
uv = (uv - 0.5) * _spadScale + 0.5;
Expand All @@ -39,7 +41,13 @@ void LetterBoxImage(uint2 id : SV_DispatchThreadID)
// Bounding
rgb *= all(uv > 0) && all (uv < 1);

_letterboxTexture[id] = float4(rgb, 1);
// Comvert sRGB color (= Liner color space) because Compute Shader texture output is not converted.
if(_isLinerColorSpace) rgb = LinearToGammaSpace(rgb);

uint offs = (id.y * _letterboxWidth + id.x) * 3;
_letterboxTextureBuffer[offs + 0] = rgb.r;
_letterboxTextureBuffer[offs + 1] = rgb.g;
_letterboxTextureBuffer[offs + 2] = rgb.b;
}


Expand Down Expand Up @@ -106,7 +114,7 @@ void PoseRegionUpdate(uint id : SV_DispatchThreadID)

sampler2D _inputTexture;
StructuredBuffer<PoseRegion> _cropRegion;
RWTexture2D<float4> _cropedTexture;
RWStructuredBuffer<float> _cropedTextureBuffer;

// Crop image from PoseRegion.
[numthreads(8, 8, 1)]
Expand All @@ -115,7 +123,7 @@ void CropImage(uint2 id : SV_DispatchThreadID)
float4x4 xform = _cropRegion[0].cropMatrix;

// UV coordinates
float2 uv = (id + 0.5) / CROP_IMAGE_SIZE;
float2 uv = float2(0.5 + id.x, CROP_IMAGE_SIZE - 0.5 - id.y) / CROP_IMAGE_SIZE;
uv = mul(xform, float4(uv, 0, 1)).xy;

// De-letterboxing
Expand All @@ -128,23 +136,32 @@ void CropImage(uint2 id : SV_DispatchThreadID)
// Texture sample
float3 rgb = tex2Dgrad(_inputTexture, uv, duv_dx, duv_dy).rgb;

// Set pixel value.
_cropedTexture[id] = float4(rgb, 1);
// Comvert sRGB color (= Liner color space) because Compute Shader texture output is not converted.
if(_isLinerColorSpace) rgb = LinearToGammaSpace(rgb);

uint offs = (id.y * CROP_IMAGE_SIZE + id.x) * 3;
_cropedTextureBuffer[offs + 0] = rgb.r;
_cropedTextureBuffer[offs + 1] = rgb.g;
_cropedTextureBuffer[offs + 2] = rgb.b;
}


// Kernel 3

#define LPF_WINDOW_MAX_COUNT 5

uint _isWorldProcess;
uint _keypointCount;
float _postDeltatime;
float _velocity_scale;
uint _rvfWindowCount;

StructuredBuffer<float4> _postInput;
StructuredBuffer<float4> _postInputWorld;
StructuredBuffer<PoseRegion> _postRegion;

RWStructuredBuffer<float4> _postDeltaOutput;
RWStructuredBuffer<float> _postRvfWindowBuffer;
RWStructuredBuffer<float3> _postLastValueScale;
RWStructuredBuffer<float4> _postOutput;
RWStructuredBuffer<float4> _postDeltaOutputWorld;
RWStructuredBuffer<float4> _postOutputWorld;

// Map pose landmark to cordinates on the original input image.
[numthreads(33 + 1, 1, 1)]
Expand All @@ -155,7 +172,6 @@ void PostProcess(uint id : SV_DispatchThreadID)
if(id == _keypointCount){
// Set human exist score in last index.
_postOutput[id] = _postInput[_keypointCount];
_postOutputWorld[id] = _postInputWorld[_keypointCount];
}
else{
// Process for normalized landmark
Expand All @@ -166,45 +182,81 @@ void PostProcess(uint id : SV_DispatchThreadID)
float3 x = _postInput[id].xyz;
// Pose landmark on previous frame
float3 p_x = _postOutput[id].xyz;
// Pose landmark delta
float3 p_dx = _postDeltaOutput[id].xyz;

// Map to cordinates of letter-box image from croped image.
x = mul(region.cropMatrix, float4(x, 1)).xyz;
// Map to cordinates of original input image from letter-box image.
x.xy = (x.xy - 0.5) * _spadScale + 0.5;

// Calculate with low pass filter for decrease jitter.
float3 lpf_params = float3(30, 1.5, _postDeltatime);
float3 dx = lpf_Step_dx(x, p_x, p_dx, lpf_params);
x = lpf_Step_x(x, p_x, dx, lpf_params);

// Set pose landmark delta for the next frame processing.
_postDeltaOutput[id] = float4(dx, 1);
// Pose landmark final result
_postOutput[id] = float4(x, score);
// previous value scale for relative velocity filter
float3 p_value_scale = _postLastValueScale[id];

if(_isWorldProcess){
x = mul(makeRotationMatrix(region.box.w), float4(x, 1)).xyz;
}
else{
// Map to cordinates of letter-box image from croped image.
x = mul(region.cropMatrix, float4(x, 1)).xyz;
// Map to cordinates of original input image from letter-box image.
x.xy = (x.xy - 0.5) * _spadScale + 0.5;
}

// Apply relative velocity filter
// reference:
// https://github.com/asus4/tf-lite-unity-sample/blob/26e49bf4a45a550f84f12635a97102a3e207009e/Packages/com.github.asus4.mediapipe/Runtime/RelativeVelocityFilter.cs
float min_x = _postInput[0].x;
float max_x = _postInput[0].x;
float min_y = _postInput[0].y;
float max_y = _postInput[0].y;
for(uint k = 0; k < _keypointCount; k++){
if(min_x > _postInput[k].x) min_x = _postInput[k].x;
if(max_x < _postInput[k].x) max_x = _postInput[k].x;
if(min_y > _postInput[k].y) min_y = _postInput[k].y;
if(max_y < _postInput[k].y) max_y = _postInput[k].y;
}

float2 size = float2(max_x - min_x, max_y - min_y);
float3 value_scale = 1.0f / ((size.x + size.y) / 2);
int window_size = min(_rvfWindowCount, LPF_WINDOW_MAX_COUNT);
if(window_size > 0){
float3 distance = x * value_scale - p_x * p_value_scale;
float3 distance_sum = distance;
float duration_sum = _postDeltatime;
float duration_max = (1 + window_size) * _postDeltatime;

for(int i = 0; i < window_size; i++){
uint offs = (i * _keypointCount + id) * 4;
float3 p_distance = float3(_postRvfWindowBuffer[offs + 0], _postRvfWindowBuffer[offs + 1], _postRvfWindowBuffer[offs + 2]);
float p_duration = _postRvfWindowBuffer[offs + 3];
if(duration_sum + p_duration > duration_max) break;
distance_sum += p_distance;
duration_sum +=p_duration;
}

float3 velocity = distance_sum / duration_sum;
float3 alpha = 1.0 - 1.0 / (1.0 + _velocity_scale * abs(velocity));
x = lerp(p_x, x, alpha);

// window enqueue & dequeue for next frame
if(window_size == LPF_WINDOW_MAX_COUNT){
for(int j = 0; j < window_size - 1; j++){
uint offs1 = (j * _keypointCount + id) * 4;
uint offs2 = ((j + 1) * _keypointCount + id) * 4;
_postRvfWindowBuffer[offs1 + 0] = _postRvfWindowBuffer[offs2 + 0];
_postRvfWindowBuffer[offs1 + 1] = _postRvfWindowBuffer[offs2 + 1];
_postRvfWindowBuffer[offs1 + 2] = _postRvfWindowBuffer[offs2 + 2];
_postRvfWindowBuffer[offs1 + 3] = _postRvfWindowBuffer[offs2 + 3];
}
uint offs = ((window_size - 1) * _keypointCount + id) * 4;
_postRvfWindowBuffer[offs + 0] = distance.x;
_postRvfWindowBuffer[offs + 1] = distance.y;
_postRvfWindowBuffer[offs + 2] = distance.z;
_postRvfWindowBuffer[offs + 3] = _postDeltatime;
}
else{
uint offs = (window_size * _keypointCount + id) * 4;
_postRvfWindowBuffer[offs + 0] = distance.x;
_postRvfWindowBuffer[offs + 1] = distance.y;
_postRvfWindowBuffer[offs + 2] = distance.z;
_postRvfWindowBuffer[offs + 3] = _postDeltatime;
}
}

// ---------

// Process for world landmark
// Visiblity of pose world landmark
score = _postInputWorld[id].w;
// Pose world landmark
x = _postInputWorld[id].xyz;
// Pose world landmark on previous frame
p_x = _postOutputWorld[id].xyz;
// Pose world landmark delta
p_dx =_postDeltaOutputWorld[id].xyz;

x = mul(makeRotationMatrix(region.box.w), float4(x, 1)).xyz;

// Calculate with low pass filter for decrease jitter.
dx = lpf_Step_dx(x, p_x, p_dx, lpf_params);
x = lpf_Step_x(x, p_x, dx, lpf_params);

// Set pose world landmark delta for the next frame processing.
_postDeltaOutputWorld[id] = float4(dx, 1);
// Pose world landmark final result
_postOutputWorld[id] = float4(x, score);
_postOutput[id] = float4(x, score);
_postLastValueScale[id] = value_scale;
}
}
Loading

0 comments on commit be6db72

Please sign in to comment.