Skip to content

Commit

Permalink
Merge pull request #66 from tucan9389/3d-pose
Browse files Browse the repository at this point in the history
[PR] Implement post-process of 3d pose estimation (SimpleBaseline)
  • Loading branch information
tucan9389 authored Mar 17, 2021
2 parents da2931e + 832c8be commit c31d8dd
Show file tree
Hide file tree
Showing 21 changed files with 1,280 additions and 324 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,5 @@ Pods/*
Podfile.lock

# TensorFlow Lite Model
*.tflite
*.tflite
.DS_Store
2 changes: 1 addition & 1 deletion Podfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ target 'PoseEstimation-TFLiteSwift' do
use_frameworks!

# Pods for PoseEstimation-TFLiteSwift
pod 'TensorFlowLiteSwift'
pod 'TensorFlowLiteSwift/CoreML', '~> 2.4.0'

end
112 changes: 69 additions & 43 deletions PoseEstimation-TFLiteSwift.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

442 changes: 263 additions & 179 deletions PoseEstimation-TFLiteSwift/Base.lproj/Main.storyboard

Large diffs are not rendered by default.

Large diffs are not rendered by default.

16 changes: 13 additions & 3 deletions PoseEstimation-TFLiteSwift/CVPixelBufferExtension.swift
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ extension CVPixelBuffer {
/// floating point values).
/// - Returns: The RGB data representation of the image buffer or `nil` if the buffer could not be
/// converted.
func rgbData(byteCount: Int, isNormalized: Bool = false, isModelQuantized: Bool) -> Data? {
func rgbData(byteCount: Int, normalization: TFLiteImageInterpreter.NormalizationOptions = .none, isModelQuantized: Bool) -> Data? {
CVPixelBufferLockBaseAddress(self, .readOnly)
defer { CVPixelBufferUnlockBaseAddress(self, .readOnly) }
guard let sourceData = CVPixelBufferGetBaseAddress(self) else {
Expand Down Expand Up @@ -165,9 +165,19 @@ extension CVPixelBuffer {
if isModelQuantized { return imageByteData }

let imageBytes = [UInt8](imageByteData)
let bytes: [Float]
if isNormalized {
var bytes: [Float] = []
if normalization == .scaledNormalization {
bytes = imageBytes.map { Float($0) / 255.0 } // normalization
} else if normalization == .pytorchNormalization {
// bytes = imageBytes.map { Float($0) / 255.0 } // normalization
bytes = imageBytes.map { Float($0) } // normalization
for i in 0 ..< width * height {
bytes[i ] = (Float32(imageBytes[i * 3 + 0]) - 0.485) / 0.229 // R
bytes[width * height + i ] = (Float32(imageBytes[i * 3 + 1]) - 0.456) / 0.224 // G
bytes[width * height * 2 + i] = (Float32(imageBytes[i * 3 + 2]) - 0.406) / 0.225 // B
}
} else if normalization == .meanStdNormalization {
assert(false, "not support '.meanStdNormalization'")
} else {
bytes = imageBytes.map { Float($0) } // not normalization
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class LiveLineHeatmapViewController: UIViewController {
humanType: humanType)
}

let poseEstimator: PoseEstimator = IMGCLSPoseEstimator()
let poseEstimator: PoseEstimator = Baseline3DPoseEstimator()

override func viewDidLoad() {
super.viewDidLoad()
Expand Down Expand Up @@ -292,10 +292,10 @@ extension LiveLineHeatmapViewController {
DispatchQueue.main.async {
if let partOffset = self.selectedPartIndex {
self.lineDotView?.lines = []
self.lineDotView?.keypoints = output.humans.map { $0.keypoints[partOffset] }
self.lineDotView?.keypoints = output.humans2d.compactMap { $0 }.map { $0.keypoints[partOffset] }
} else { // ALL case
self.lineDotView?.lines = output.humans.reduce([]) { $0 + $1.lines }
self.lineDotView?.keypoints = output.humans.reduce([]) { $0 + $1.keypoints }
self.lineDotView?.lines = output.humans2d.compactMap { $0 }.reduce([]) { $0 + $1.lines }
self.lineDotView?.keypoints = output.humans2d.compactMap { $0 }.reduce([]) { $0 + $1.keypoints }
}

if let partOffset = self.selectedPartIndex {
Expand Down
48 changes: 24 additions & 24 deletions PoseEstimation-TFLiteSwift/OpenPose/OpenPosePoseEstimator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class OpenPosePoseEstimator: PoseEstimator {
inputWidth: Input.width,
inputHeight: Input.height,
isGrayScale: Input.isGrayScale,
isNormalized: Input.isNormalized
normalization: Input.normalization
)
let imageInterpreter = TFLiteImageInterpreter(options: options)
return imageInterpreter
Expand Down Expand Up @@ -88,7 +88,7 @@ private extension OpenPosePoseEstimator {
static let width = 432
static let height = 368
static let isGrayScale = false
static let isNormalized = false
static let normalization = TFLiteImageInterpreter.NormalizationOptions.none
}
struct Output {
struct ConfidenceMap { // similar to Heatmap
Expand Down Expand Up @@ -233,18 +233,18 @@ private extension PoseEstimationOutput {
let human = parseSinglePerson(outputs,
partIndex: postprocessOptions.bodyPart,
partThreshold: postprocessOptions.partThreshold)
humans = [human]
humans = [.human2d(human: human)]
case .multiPerson(let pairThreshold, let nmsFilterSize, let maxHumanNumber):
humans = parseMultiHuman(outputs,
partIndex: postprocessOptions.bodyPart,
partThreshold: postprocessOptions.partThreshold,
pairThreshold: pairThreshold,
nmsFilterSize: nmsFilterSize,
maxHumanNumber: maxHumanNumber)
maxHumanNumber: maxHumanNumber).map { .human2d(human: $0) }
}
}

func parseSinglePerson(_ outputs: [TFLiteFlatArray<Float32>], partIndex: Int?, partThreshold: Float?) -> Human {
func parseSinglePerson(_ outputs: [TFLiteFlatArray<Float32>], partIndex: Int?, partThreshold: Float?) -> Human2D {
// openpose_ildoonet.tflite only use the first output
let output = outputs[0]

Expand All @@ -263,28 +263,28 @@ private extension PoseEstimationOutput {
return (point: CGPoint(x: x, y: y), score: score)
}

let keypoints: [Keypoint?] = keypointInfos
.map { keypointInfo -> Keypoint? in Keypoint(position: keypointInfo.point, score: keypointInfo.score) }
.map { keypointInfo -> Keypoint? in
let keypoints: [Keypoint2D?] = keypointInfos
.map { keypointInfo -> Keypoint2D? in Keypoint2D(position: keypointInfo.point, score: keypointInfo.score) }
.map { keypointInfo -> Keypoint2D? in
guard let score = keypointInfo?.score, let partThreshold = partThreshold else { return keypointInfo }
return (score > partThreshold) ? keypointInfo : nil
}

// lines
var keypointWithBodyPart: [OpenPosePoseEstimator.Output.BodyPart: Keypoint] = [:]
var keypointWithBodyPart: [OpenPosePoseEstimator.Output.BodyPart: Keypoint2D] = [:]
OpenPosePoseEstimator.Output.BodyPart.allCases.enumerated().forEach { (index, bodyPart) in
keypointWithBodyPart[bodyPart] = keypoints[index]
}
let lines: [Human.Line] = OpenPosePoseEstimator.Output.BodyPart.lines.compactMap { line in
let lines: [Human2D.Line2D] = OpenPosePoseEstimator.Output.BodyPart.lines.compactMap { line in
guard let fromKeypoint = keypointWithBodyPart[line.from],
let toKeypoint = keypointWithBodyPart[line.to] else { return nil }
return (from: fromKeypoint, to: toKeypoint)
}

return Human(keypoints: keypoints, lines: lines)
return Human2D(keypoints: keypoints, lines: lines)
}

func parseMultiHuman(_ outputs: [TFLiteFlatArray<Float32>], partIndex: Int?, partThreshold: Float?, pairThreshold: Float?, nmsFilterSize: Int, maxHumanNumber: Int?) -> [Human] {
func parseMultiHuman(_ outputs: [TFLiteFlatArray<Float32>], partIndex: Int?, partThreshold: Float?, pairThreshold: Float?, nmsFilterSize: Int, maxHumanNumber: Int?) -> [Human2D] {
// openpose_ildoonet.tflite only use the first output
let output = outputs[0]

Expand All @@ -303,15 +303,15 @@ private extension PoseEstimationOutput {
}
}

func parseSinglePartOnMultiHuman(_ output: TFLiteFlatArray<Float32>, partIndex: Int, partThreshold: Float?, nmsFilterSize: Int = 3) -> [Human] {
func parseSinglePartOnMultiHuman(_ output: TFLiteFlatArray<Float32>, partIndex: Int, partThreshold: Float?, nmsFilterSize: Int = 3) -> [Human2D] {
// process NMS
let keypointIndexes = output.keypoints(partIndex: partIndex,
filterSize: nmsFilterSize,
threshold: partThreshold)

// convert col,row to Keypoint
let kps: [Keypoint] = keypointIndexes.map { keypointInfo in
return Keypoint(column: keypointInfo.col,
let kps: [Keypoint2D] = keypointIndexes.map { keypointInfo in
return Keypoint2D(column: keypointInfo.col,
row: keypointInfo.row,
width: OpenPosePoseEstimator.Output.ConfidenceMap.width,
height: OpenPosePoseEstimator.Output.ConfidenceMap.height,
Expand All @@ -320,14 +320,14 @@ private extension PoseEstimationOutput {

// Make [Human]
return kps.map { keypoint in
let keypoints: [Keypoint?] = OpenPosePoseEstimator.Output.BodyPart.allCases.enumerated().map { offset, _ in
let keypoints: [Keypoint2D?] = OpenPosePoseEstimator.Output.BodyPart.allCases.enumerated().map { offset, _ in
return (offset == partIndex) ? keypoint : nil
}
return Human(keypoints: keypoints, lines: [])
return Human2D(keypoints: keypoints, lines: [])
}
}

func parseAllPartOnMultiHuman(_ output: TFLiteFlatArray<Float32>, partIndex: Int?, partThreshold: Float?, pairThreshold: Float?, nmsFilterSize: Int, maxHumanNumber: Int?) -> [Human] {
func parseAllPartOnMultiHuman(_ output: TFLiteFlatArray<Float32>, partIndex: Int?, partThreshold: Float?, pairThreshold: Float?, nmsFilterSize: Int, maxHumanNumber: Int?) -> [Human2D] {

let parts = OpenPosePoseEstimator.Output.BodyPart.allCases
var verticesForEachPart: [[KeypointElement]?] = parts.map { _ in nil }
Expand Down Expand Up @@ -447,21 +447,21 @@ private extension PoseEstimationOutput {
}
}

let humans: [Human] = tmpHumans.map { tmpHuman in
let keypoints: [Keypoint?] = tmpHuman.enumerated().map { (offset, locationInfo) in
let humans: [Human2D] = tmpHumans.map { tmpHuman in
let keypoints: [Keypoint2D?] = tmpHuman.enumerated().map { (offset, locationInfo) in
guard let locationInfo = locationInfo else { return nil }
return Keypoint(column: locationInfo.col,
return Keypoint2D(column: locationInfo.col,
row: locationInfo.row,
width: colSize,
height: rowSize,
value: locationInfo.val)
}
let lines: [(from: Keypoint, to: Keypoint)] = pairs.compactMap { pair in
let lines: [(from: Keypoint2D, to: Keypoint2D)] = pairs.compactMap { pair in
guard let startingKeypoint = keypoints[pair.from.offsetValue()],
let endingKeypoint = keypoints[pair.to.offsetValue()] else { return nil }
return (from: startingKeypoint, to: endingKeypoint)
}
return Human(keypoints: keypoints, lines: lines)
return Human2D(keypoints: keypoints, lines: lines)
}
return humans
}
Expand Down Expand Up @@ -499,7 +499,7 @@ private extension TFLiteFlatArray where Element == Float32 {
}
}

private extension Keypoint {
private extension Keypoint2D {
init(column: Int, row: Int, width: Int, height: Int, value: Float32) {
let x = (CGFloat(column) + 0.5) / CGFloat(width)
let y = (CGFloat(row) + 0.5) / CGFloat(height)
Expand Down
Loading

0 comments on commit c31d8dd

Please sign in to comment.