主題

狼追逐 v4

夏洛爾 | 2023-01-18 22:53:24 | 巴幣 0 | 人氣 111

Wolf Run V4

實驗目標:

1.進入靜立狀態後，進入追逐狀態，在追逐狀態下，要能持續跑至接近目標的距離內

2.尺寸介於1-5倍

實驗設計:

1.任何弱點觸地皆失敗 (尾巴和四個小腿並非是弱點)

2.非弱點肢體

if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}

3.

//Set: judge.endEpisode = true

//Set: judge.episodeLength = 15f

//Set: useClampReward = true

//Set: SharpingBuffer Len=250 Th=-0.4

if(weaknessOnGround)

{

if(inferenceMode)

{

brainMode = BrainMode.GetUp;

SetModel("WolfGetUp", getUpBrain);

behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;

}

else

{

AddReward(-1f);

judge.outLife++;

judge.Reset();

return;

}

else if(wolfRoot.localPosition.y < -10f)

{

if(inferenceMode)

{

brainMode = BrainMode.GetUp;

SetModel("WolfGetUp", getUpBrain);

behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;

}

else

{

AddReward(-1f);

judge.outY++;

judge.Reset();

return;

}

else

{

targetSmoothPosition = targetPositionBuffer.GetSmoothVal();

headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);

rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);

flatTargetVelocity = rootDir;

flatTargetVelocity.y = 0f;

targetDistance = flatTargetVelocity.magnitude;

Vector3 forwardDir = flatTargetVelocity.normalized;

Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);

lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, headDir));

//SideUp

upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.forward, flatLeftDir));

aimVelocity = flatTargetVelocity.normalized;

aimVelocity.y = 0.2f;

spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.up*-1f, Vector3.up));

//SideLook

spineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));

rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.up, Vector3.up));

//SideLook

rootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right*-1f, flatLeftDir));

leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftThigh.forward * -1f, flatLeftDir));

rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightThigh.forward * -1f, flatLeftDir));

//For Sync run

Vector3 leftThighUpDir = Vector3.ProjectOnPlane(wolfLeftThigh.right, flatLeftDir);

Vector3 rightThighUpDir = Vector3.ProjectOnPlane(wolfRightThigh.right, flatLeftDir);

float thighUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftThighUpDir, rightThighUpDir));

leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftUpperArm.forward * -1f, flatLeftDir));

rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightUpperArm.forward * -1f, flatLeftDir));

//For Sync run

Vector3 leftUpperArmUpDir = Vector3.ProjectOnPlane(wolfLeftUpperArm.right, flatLeftDir);

Vector3 rightUpperArmUpDir = Vector3.ProjectOnPlane(wolfRightUpperArm.right, flatLeftDir);

float upperArmUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftUpperArmUpDir, rightUpperArmUpDir));

tailAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfTail.right, flatTargetVelocity));

avgVelocity = velocityBuffer.GetSmoothVal();

velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);

velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);

flatVelocity = avgVelocity;

flatVelocity.y = 0f;

flatVelocityManitude = flatVelocity.magnitude;

velocityCoef = Mathf.InverseLerp(0f, 15f*currentSize, Vector3.Project(avgVelocity, aimVelocity).magnitude );

flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);

if(!inferenceMode)

{

if(targetDistance > nearModeRange)

{

if(Time.fixedTime - landingMoment > landingBufferTime)

{

bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f*currentSize, (Time.fixedTime - landingMoment - landingBufferTime)/5f);

bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);

float motionLimit = Mathf.Lerp(0.5f, 0.8f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);

float motionLimit3 = Mathf.Lerp(0.5f, 0.8f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);

float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);

float sharpingResetVal = Mathf.Lerp(0f, sharpingResetThreshould, (Time.fixedTime - landingMoment - landingBufferTime - 2f)/5f);

bool outMotion = lookAngle < motionLimit

|| upAngle < motionLimit

|| leftThighAngle < motionLimit2

|| rightThighAngle < motionLimit2

|| spineLookAngle < motionLimit

|| rootLookAngle < motionLimit

// || spineUpAngle < motionLimit3

// || rootUpAngle < motionLimit3

|| thighUpAngle < motionLimit2

|| upperArmUpAngle < motionLimit2

|| leftUpperArmAngle < motionLimit2

|| rightUpperArmAngle < motionLimit2;

if( outSpeed || outDirection || outMotion)

{

// AddReward(-1f);

if(outSpeed)

{

#if UNITY_EDITOR

Debug.Log("outSpeed");

#endif

clampReward += -0.1f;

judge.outSpeed++;

}

if(outDirection)

{

#if UNITY_EDITOR

Debug.Log("outDirection");

#endif

clampReward += -0.1f;

judge.outDirection++;

}

if(outMotion)

{

#if UNITY_EDITOR

Debug.Log("outMotion");

#endif

clampReward += -0.1f;

judge.outMotion++;

}

sharpingBuffer.PushVal(-1f);

// judge.Reset();

// return;

}

else

{

sharpingBuffer.PushVal(0f);

}

#if UNITY_EDITOR

sharpingVal = sharpingBuffer.GetSmoothVal();

#endif

if( sharpingBuffer.GetSmoothVal() < sharpingResetVal)

{

AddReward(-1f);

judge.Reset();

return;

}

if(IsOverSteps())

{

judge.Reset();

return;

}

lastReward = (velocityAngleCoef + velocityCoef) * 0.02f

+ (lookAngle+upAngle) * 0.01f + (leftThighAngle+rightThighAngle+leftUpperArmAngle+rightUpperArmAngle) * 0.0025f

+ (spineLookAngle+rootLookAngle+spineUpAngle+rootUpAngle) * 0.005f

+ (tailAngle) * 0.005f

+ (thighUpAngle + upperArmUpAngle) * 0.005f

+ (1f - exertionRatio) * 0.005f;

if(useClampReward)

{

lastReward = lastReward+clampReward;

if(lastReward < -0.5f) lastReward = -0.5f;

}

totalReward += lastReward;

AddReward( lastReward );

}

// else if(targetDistance > 1.5f)

else

{

// AddReward(1f);

judge.survived++;

judge.Reset();

return;

}

//大致來說，

--1.獎勵視線，並使用Force Sharping

--2.獎勵投影至"跑動推薦向量"的速度和角度，並使用Force Sharping

--3.獎勵四個大腿的Side Look，並使用Force Sharping

--4.獎勵尾巴符合指定角度

--5.獎勵減少動作變化

--6.獎勵雙手和雙足要同步奔跑

--7.Motion相關的Force Sharping非從0開始

--8.引導身體要盡量平行地面

--9.速度要求正比尺寸

--10.提高ForceSharping的要求，尤其進入階段

4.Force Sharping改為有容錯空間，但是容許值逆向Sharping

允許角色在5秒內發生總計2秒以內的失誤，希望藉此讓角色就算輕微失衡也能嘗試自行修正

但是容許值是逆向Sharping，會在開始Force Sharping後兩秒才逐步放寬標準

5.四腳需輪流著地

6.一次initialize-from

實驗時間:

Step: 5e7

Time Elapsed: 遺失 (initialize-from 似乎無法正常在TensorBoard展示資料)

實驗結果:

實驗結果為失敗，但結果很有價值

其實已經相當接近理想的進行四足奔馳，但是跑到高速狀態之後，會寧願飛行然後墜機，而不願意繼續跑

而這個結果來自一次正常訓練後

再進行一次initialize-from

但是initialize-from似乎並沒有優化表現和得分

因此接下來應該考慮的是為何狼會不願意持續奔跑，而寧願飛行和墜機

#自律感知演化物理性角色 #狼 #紅蓮人偶

0

留言

狼 追逐 v4

創作回應

作者相關創作

帶領大家向前行待邁進 💨

相關創作

更多創作

狼追逐 v4