camenduru's picture
thanks to openpose ❤
f5bb0c0
#ifndef OPENPOSE_PRIVATE_UTILITIES_RENDER_HU
#define OPENPOSE_PRIVATE_UTILITIES_RENDER_HU
namespace op
{
__inline__ __device__ void getBoundingBoxPerPerson(
float* maxPtr, float* minPtr, float* scalePtr,const unsigned int targetWidth, const unsigned int targetHeight,
const float* const keypointsPtr, const int numberPeople, const int numberParts, const float threshold)
{
const unsigned long globalIdx = threadIdx.x;
// Fill shared parameters
if (globalIdx < numberPeople)
{
float minValueX = (float)targetWidth;
float minValueY = (float)targetHeight;
float maxValueX = 0.f;
float maxValueY = 0.f;
for (auto part = 0 ; part < numberParts ; part++)
{
const unsigned long index = 3u * (globalIdx*numberParts + part);
const float x = keypointsPtr[index];
const float y = keypointsPtr[index+1];
const float score = keypointsPtr[index+2];
if (score > threshold)
{
if (x < minValueX)
minValueX = x;
if (x > maxValueX)
maxValueX = x;
if (y < minValueY)
minValueY = y;
if (y > maxValueY)
maxValueY = y;
}
}
if (maxValueX != 0.f && maxValueY != 0.f)
{
const auto averageX = maxValueX - minValueX;
const auto averageY = maxValueY - minValueY;
// (averageX + averageY) / 2.f / 400.f
scalePtr[globalIdx] = fastTruncateCuda((averageX + averageY) / 400.f, 0.33f, 1.f);
const auto constantToAdd = 50.f;
maxValueX += constantToAdd;
maxValueY += constantToAdd;
minValueX -= constantToAdd;
minValueY -= constantToAdd;
}
// const auto xIndex = 2*globalIdx;
// const auto yIndex = xIndex+1;
const auto xIndex = globalIdx;
const auto yIndex = numberPeople+globalIdx;
minPtr[xIndex] = minValueX;
minPtr[yIndex] = minValueY;
maxPtr[xIndex] = maxValueX;
maxPtr[yIndex] = maxValueY;
}
}
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
__inline__ __device__ void renderKeypoints(
float* targetPtr, float* sharedMaxs, float* sharedMins, float* sharedScaleF, const float* const maxPtr,
const float* const minPtr, const float* const scalePtr, const int globalIdx, const int x, const int y,
const unsigned int targetWidth, const unsigned int targetHeight, const float* const keypointsPtr,
const unsigned int* const partPairsPtr, const int numberPeople, const int numberParts,
const int numberPartPairs, const float* const rgbColorsPtr, const int numberColors, const float radius,
const float lineWidth, const float* const keypointScalePtr, const int numberScales, const float threshold,
const float alphaColorToAdd, const bool blendOriginalFrame = true, const int googlyEye1 = -1,
const int googlyEye2 = -1)
{
// Load shared memory
if (globalIdx < 2*numberPeople)
{
sharedMins[globalIdx] = minPtr[globalIdx];
sharedMaxs[globalIdx] = maxPtr[globalIdx];
if (globalIdx < numberPeople)
sharedScaleF[globalIdx] = scalePtr[globalIdx];
}
__syncthreads();
// Fill each (x,y) target pixel
if (x < targetWidth && y < targetHeight)
{
const unsigned long baseIndex = 3u*(y * (unsigned long)targetWidth + x);
float b = targetPtr[baseIndex];
float g = targetPtr[baseIndex+1];
float r = targetPtr[baseIndex+2];
if (!blendOriginalFrame)
{
b = 0.f;
g = 0.f;
r = 0.f;
}
const auto lineWidthSquared = lineWidth * lineWidth;
const auto radiusSquared = radius * radius;
for (auto person = 0; person < numberPeople; person++)
{
// Make sure person x,y in the limits
// Make sure person is not empty. Assume all joints are below threshold. Then
// maxs = 0 and mins = width/height. So if statement would be false
// const auto xIndex = 2*person;
// const auto yIndex = xIndex+1;
const auto xIndex = person;
const auto yIndex = numberPeople+person;
if (x <= sharedMaxs[xIndex] && x >= sharedMins[xIndex]
&& y <= sharedMaxs[yIndex] && y >= sharedMins[yIndex])
{
// Part pair connections
for (auto partPair = 0; partPair < numberPartPairs; partPair++)
{
const auto partA = partPairsPtr[2*partPair];
const auto partB = partPairsPtr[2*partPair+1];
const auto indexA = person*numberParts*3 + partA*3;
const auto xA = keypointsPtr[indexA];
const auto yA = keypointsPtr[indexA + 1];
const auto scoreA = keypointsPtr[indexA + 2];
const auto indexB = person*numberParts*3 + partB*3;
const auto xB = keypointsPtr[indexB];
const auto yB = keypointsPtr[indexB + 1];
const auto scoreB = keypointsPtr[indexB + 2];
if (scoreA > threshold && scoreB > threshold)
{
const auto keypointScale = keypointScalePtr[partB%numberScales]
* keypointScalePtr[partB%numberScales]
* keypointScalePtr[partB%numberScales];
const auto lineWidthScaled = lineWidthSquared * keypointScale;
const auto bSqrt = sharedScaleF[person] * sharedScaleF[person] * lineWidthScaled;
const auto xP = (xA + xB) / 2.f;
const auto yP = (yA + yB) / 2.f;
const auto aSqrt = (xA - xP) * (xA - xP) + (yA - yP) * (yA - yP);
const auto angle = atan2f(yB - yA, xB - xA);
const auto sine = sinf(angle);
const auto cosine = cosf(angle);
const auto A = cosine * (x - xP) + sine * (y - yP);
const auto B = sine * (x - xP) - cosine * (y - yP);
const auto judge = A * A / aSqrt + B * B / bSqrt;
const auto minV = 0.f;
const auto maxV = 1.f;
if (minV <= judge && judge <= maxV)
// Before used partPair vs partB
addColorWeighted(r, g, b, &rgbColorsPtr[(partB%numberColors)*3], alphaColorToAdd);
}
}
// Part circles
for (auto part = 0u; part < numberParts; part++)
{
const auto index = 3 * (person*numberParts + part);
const auto localX = keypointsPtr[index];
const auto localY = keypointsPtr[index + 1];
const auto score = keypointsPtr[index + 2];
if (score > threshold)
{
const auto keypointScale = keypointScalePtr[part%numberScales]
* keypointScalePtr[part%numberScales]
* keypointScalePtr[part%numberScales];
const auto radiusScaled = radiusSquared * keypointScale;
const auto dist2 = (x - localX) * (x - localX) + (y - localY) * (y - localY);
// Googly eyes
if (googlyEye1 == part || googlyEye2 == part)
{
const auto eyeRatio = 2.5f * sqrt(radiusScaled);
const auto minr2 = sharedScaleF[person] * sharedScaleF[person]
* (eyeRatio - 2) * (eyeRatio - 2);
const auto maxr2 = sharedScaleF[person] * sharedScaleF[person] * eyeRatio * eyeRatio;
if (dist2 <= maxr2)
{
float colorToAdd [3] = {0., 0., 0.};
if (dist2 <= minr2)
for (auto& color : colorToAdd)
color = {255.f};
if (dist2 <= minr2*0.6f)
{
const auto dist3 = (x-4 - localX)
* (x-4 - localX) + (y - localY+4) * (y - localY+4);
if (dist3 > 14.0625f) // 3.75f^2
for (auto& color : colorToAdd)
color = {0.f};
}
const auto alphaColorToAdd = 0.9f;
addColorWeighted(r, g, b, colorToAdd, alphaColorToAdd);
}
}
// Other parts
else
{
const auto minr2 = 0.f;
const auto maxr2 = sharedScaleF[person] * sharedScaleF[person] * radiusScaled;
if (minr2 <= dist2 && dist2 <= maxr2)
addColorWeighted(r, g, b, &rgbColorsPtr[(part%numberColors)*3], alphaColorToAdd);
}
}
}
}
}
targetPtr[baseIndex] = b;
targetPtr[baseIndex+1] = g;
targetPtr[baseIndex+2] = r;
}
}
__inline__ __device__ void renderKeypointsOld(
float* targetPtr, float2* sharedMaxs, float2* sharedMins, float* sharedScaleF, const int globalIdx,
const int x, const int y, const unsigned int targetWidth, const unsigned int targetHeight, const float* const keypointsPtr,
const unsigned int* const partPairsPtr, const int numberPeople, const int numberParts,
const int numberPartPairs, const float* const rgbColorsPtr, const int numberColors, const float radius,
const float lineWidth, const float* const keypointScalePtr, const int numberScales, const float threshold,
const float alphaColorToAdd, const bool blendOriginalFrame = true, const int googlyEye1 = -1,
const int googlyEye2 = -1)
{
// Fill shared parameters
if (globalIdx < numberPeople)
{
float minValueX = (float)targetWidth;
float minValueY = (float)targetHeight;
float maxValueX = 0.f;
float maxValueY = 0.f;
for (auto part = 0 ; part < numberParts ; part++)
{
const unsigned long index = 3u * (((unsigned long)globalIdx)*numberParts + part);
const float x = keypointsPtr[index];
const float y = keypointsPtr[index+1];
const float score = keypointsPtr[index+2];
if (score > threshold)
{
if (x < minValueX)
minValueX = x;
if (x > maxValueX)
maxValueX = x;
if (y < minValueY)
minValueY = y;
if (y > maxValueY)
maxValueY = y;
}
}
if (maxValueX != 0.f && maxValueY != 0.f)
{
const auto averageX = maxValueX - minValueX;
const auto averageY = maxValueY - minValueY;
// (averageX + averageY) / 2.f / 400.f
sharedScaleF[globalIdx] = fastTruncateCuda((averageX + averageY) / 400.f, 0.33f, 1.f);
const auto constantToAdd = 50.f;
maxValueX += constantToAdd;
maxValueY += constantToAdd;
minValueX -= constantToAdd;
minValueY -= constantToAdd;
}
sharedMins[globalIdx].x = minValueX;
sharedMins[globalIdx].y = minValueY;
sharedMaxs[globalIdx].x = maxValueX;
sharedMaxs[globalIdx].y = maxValueY;
}
__syncthreads();
// Fill each (x,y) target pixel
if (x < targetWidth && y < targetHeight)
{
const unsigned long baseIndex = 3u*(y * (unsigned long)targetWidth + x);
float b = targetPtr[baseIndex];
float g = targetPtr[baseIndex+1];
float r = targetPtr[baseIndex+2];
if (!blendOriginalFrame)
{
b = 0.f;
g = 0.f;
r = 0.f;
}
const auto lineWidthSquared = lineWidth * lineWidth;
const auto radiusSquared = radius * radius;
for (auto person = 0; person < numberPeople; person++)
{
// Make sure person x,y in the limits
// Make sure person is not empty. Assume all joints are below threshold. Then
// maxs = 0 and mins = width/height. So if statement would be false
if (x <= sharedMaxs[person].x && x >= sharedMins[person].x
&& y <= sharedMaxs[person].y && y >= sharedMins[person].y)
{
// Part pair connections
for (auto partPair = 0; partPair < numberPartPairs; partPair++)
{
const auto partA = partPairsPtr[2*partPair];
const auto partB = partPairsPtr[2*partPair+1];
const auto indexA = person*numberParts*3 + partA*3;
const auto xA = keypointsPtr[indexA];
const auto yA = keypointsPtr[indexA + 1];
const auto scoreA = keypointsPtr[indexA + 2];
const auto indexB = person*numberParts*3 + partB*3;
const auto xB = keypointsPtr[indexB];
const auto yB = keypointsPtr[indexB + 1];
const auto scoreB = keypointsPtr[indexB + 2];
if (scoreA > threshold && scoreB > threshold)
{
const auto keypointScale = keypointScalePtr[partB%numberScales]
* keypointScalePtr[partB%numberScales]
* keypointScalePtr[partB%numberScales];
const auto lineWidthScaled = lineWidthSquared * keypointScale;
const auto bSqrt = sharedScaleF[person] * sharedScaleF[person] * lineWidthScaled;
const auto xP = (xA + xB) / 2.f;
const auto yP = (yA + yB) / 2.f;
const auto aSqrt = (xA - xP) * (xA - xP) + (yA - yP) * (yA - yP);
const auto angle = atan2f(yB - yA, xB - xA);
const auto sine = sinf(angle);
const auto cosine = cosf(angle);
const auto A = cosine * (x - xP) + sine * (y - yP);
const auto B = sine * (x - xP) - cosine * (y - yP);
const auto judge = A * A / aSqrt + B * B / bSqrt;
const auto minV = 0.f;
const auto maxV = 1.f;
if (minV <= judge && judge <= maxV)
// Before used partPair vs partB
addColorWeighted(r, g, b, &rgbColorsPtr[(partB%numberColors)*3], alphaColorToAdd);
}
}
// Part circles
for (auto part = 0u; part < numberParts; part++)
{
const auto index = 3 * (person*numberParts + part);
const auto localX = keypointsPtr[index];
const auto localY = keypointsPtr[index + 1];
const auto score = keypointsPtr[index + 2];
if (score > threshold)
{
const auto keypointScale = keypointScalePtr[part%numberScales]
* keypointScalePtr[part%numberScales]
* keypointScalePtr[part%numberScales];
const auto radiusScaled = radiusSquared * keypointScale;
const auto dist2 = (x - localX) * (x - localX) + (y - localY) * (y - localY);
// Googly eyes
if (googlyEye1 == part || googlyEye2 == part)
{
const auto eyeRatio = 2.5f * sqrt(radiusScaled);
const auto minr2 = sharedScaleF[person] * sharedScaleF[person]
* (eyeRatio - 2) * (eyeRatio - 2);
const auto maxr2 = sharedScaleF[person] * sharedScaleF[person] * eyeRatio * eyeRatio;
if (dist2 <= maxr2)
{
float colorToAdd [3] = {0., 0., 0.};
if (dist2 <= minr2)
for (auto& color : colorToAdd)
color = {255.f};
if (dist2 <= minr2*0.6f)
{
const auto dist3 = (x-4 - localX)
* (x-4 - localX) + (y - localY+4) * (y - localY+4);
if (dist3 > 14.0625f) // 3.75f^2
for (auto& color : colorToAdd)
color = {0.f};
}
const auto alphaColorToAdd = 0.9f;
addColorWeighted(r, g, b, colorToAdd, alphaColorToAdd);
}
}
// Other parts
else
{
const auto minr2 = 0.f;
const auto maxr2 = sharedScaleF[person] * sharedScaleF[person] * radiusScaled;
if (minr2 <= dist2 && dist2 <= maxr2)
addColorWeighted(r, g, b, &rgbColorsPtr[(part%numberColors)*3], alphaColorToAdd);
}
}
}
}
}
targetPtr[baseIndex] = b;
targetPtr[baseIndex+1] = g;
targetPtr[baseIndex+2] = r;
}
}
}
#endif // OPENPOSE_PRIVATE_UTILITIES_RENDER_HU