前往
大廳
主題

狼 追逐 v4

夏洛爾 | 2023-01-18 22:53:24 | 巴幣 0 | 人氣 111


Wolf Run V4

實驗目標:
1.進入靜立狀態後,進入追逐狀態,在追逐狀態下,要能持續跑至接近目標的距離內
2.尺寸介於1-5倍

實驗設計:
1.任何弱點觸地皆失敗 (尾巴和四個小腿並非是弱點)
2.非弱點肢體
if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = true
//Set: judge.episodeLength = 15f
//Set: useClampReward = true
//Set: SharpingBuffer Len=250 Th=-0.4

if(weaknessOnGround)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("WolfGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
AddReward(-1f);
judge.outLife++;
judge.Reset();
return;
}
}
else if(wolfRoot.localPosition.y < -10f)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("WolfGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
AddReward(-1f);
judge.outY++;
judge.Reset();
return;
}
}
else
{
targetSmoothPosition = targetPositionBuffer.GetSmoothVal();
headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);
rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);

flatTargetVelocity = rootDir;
flatTargetVelocity.y = 0f;
targetDistance = flatTargetVelocity.magnitude;
Vector3 forwardDir = flatTargetVelocity.normalized;
Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);
lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, headDir));
//SideUp
upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.forward, flatLeftDir));
aimVelocity = flatTargetVelocity.normalized;
aimVelocity.y = 0.2f;
spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.up*-1f, Vector3.up));
//SideLook
spineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));
rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.up, Vector3.up));
//SideLook
rootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right*-1f, flatLeftDir));
leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftThigh.forward * -1f, flatLeftDir));
rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightThigh.forward * -1f, flatLeftDir));
//For Sync run
Vector3 leftThighUpDir = Vector3.ProjectOnPlane(wolfLeftThigh.right, flatLeftDir);
Vector3 rightThighUpDir = Vector3.ProjectOnPlane(wolfRightThigh.right, flatLeftDir);
float thighUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftThighUpDir, rightThighUpDir));
leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftUpperArm.forward * -1f, flatLeftDir));
rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightUpperArm.forward * -1f, flatLeftDir));
//For Sync run
Vector3 leftUpperArmUpDir = Vector3.ProjectOnPlane(wolfLeftUpperArm.right, flatLeftDir);
Vector3 rightUpperArmUpDir = Vector3.ProjectOnPlane(wolfRightUpperArm.right, flatLeftDir);
float upperArmUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftUpperArmUpDir, rightUpperArmUpDir));
tailAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfTail.right, flatTargetVelocity));
avgVelocity = velocityBuffer.GetSmoothVal();
velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);
velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);
flatVelocity = avgVelocity;
flatVelocity.y = 0f;
flatVelocityManitude = flatVelocity.magnitude;
velocityCoef = Mathf.InverseLerp(0f, 15f*currentSize, Vector3.Project(avgVelocity, aimVelocity).magnitude );
flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);
if(!inferenceMode)
{
if(targetDistance > nearModeRange)
{
if(Time.fixedTime - landingMoment > landingBufferTime)
{
bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f*currentSize, (Time.fixedTime - landingMoment - landingBufferTime)/5f);
bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);
float motionLimit = Mathf.Lerp(0.5f, 0.8f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);
float motionLimit3 = Mathf.Lerp(0.5f, 0.8f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);
float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);
float sharpingResetVal = Mathf.Lerp(0f, sharpingResetThreshould, (Time.fixedTime - landingMoment - landingBufferTime - 2f)/5f);
bool outMotion = lookAngle < motionLimit
|| upAngle < motionLimit
|| leftThighAngle < motionLimit2
|| rightThighAngle < motionLimit2
|| spineLookAngle < motionLimit
|| rootLookAngle < motionLimit
// || spineUpAngle < motionLimit3
// || rootUpAngle < motionLimit3
|| thighUpAngle < motionLimit2
|| upperArmUpAngle < motionLimit2
|| leftUpperArmAngle < motionLimit2
|| rightUpperArmAngle < motionLimit2;
if( outSpeed || outDirection || outMotion)
{
// AddReward(-1f);
if(outSpeed)
{
#if UNITY_EDITOR
Debug.Log("outSpeed");
#endif
clampReward += -0.1f;
judge.outSpeed++;
}
if(outDirection)
{
#if UNITY_EDITOR
Debug.Log("outDirection");
#endif
clampReward += -0.1f;
judge.outDirection++;
}
if(outMotion)
{
#if UNITY_EDITOR
Debug.Log("outMotion");
#endif
clampReward += -0.1f;
judge.outMotion++;
}
sharpingBuffer.PushVal(-1f);
// judge.Reset();
// return;
}
else
{
sharpingBuffer.PushVal(0f);
}
#if UNITY_EDITOR
sharpingVal = sharpingBuffer.GetSmoothVal();
#endif
if( sharpingBuffer.GetSmoothVal() < sharpingResetVal)
{
AddReward(-1f);
judge.Reset();
return;
}
}
if(IsOverSteps())
{
judge.Reset();
return;
}
lastReward = (velocityAngleCoef + velocityCoef) * 0.02f
+ (lookAngle+upAngle) * 0.01f + (leftThighAngle+rightThighAngle+leftUpperArmAngle+rightUpperArmAngle) * 0.0025f
+ (spineLookAngle+rootLookAngle+spineUpAngle+rootUpAngle) * 0.005f
+ (tailAngle) * 0.005f
+ (thighUpAngle + upperArmUpAngle) * 0.005f
+ (1f - exertionRatio) * 0.005f;
if(useClampReward)
{
lastReward = lastReward+clampReward;
if(lastReward < -0.5f) lastReward = -0.5f;
}
totalReward += lastReward;
AddReward( lastReward );
}
// else if(targetDistance > 1.5f)
else
{
// AddReward(1f);
judge.survived++;
judge.Reset();
return;
}
}
}


//大致來說,
--1.獎勵視線,並使用Force Sharping
--2.獎勵投影至"跑動推薦向量"的速度和角度,並使用Force Sharping
--3.獎勵四個大腿的Side Look,並使用Force Sharping
--4.獎勵尾巴符合指定角度
--5.獎勵減少動作變化
--6.獎勵雙手和雙足要同步奔跑
--7.Motion相關的Force Sharping非從0開始
--8.引導身體要盡量平行地面
--9.速度要求正比尺寸
--10.提高ForceSharping的要求,尤其進入階段

4.Force Sharping改為有容錯空間,但是容許值逆向Sharping
允許角色在5秒內發生總計2秒以內的失誤,希望藉此讓角色就算輕微失衡也能嘗試自行修正
但是容許值是逆向Sharping,會在開始Force Sharping後兩秒才逐步放寬標準

5.四腳需輪流著地

6.一次initialize-from

實驗時間:
Step: 5e7
Time Elapsed: 遺失 (initialize-from 似乎無法正常在TensorBoard展示資料)

實驗結果:
實驗結果為失敗,但結果很有價值

其實已經相當接近理想的進行四足奔馳,但是跑到高速狀態之後,會寧願飛行然後墜機,而不願意繼續跑

而這個結果來自一次正常訓練後
再進行一次initialize-from
但是initialize-from似乎並沒有優化表現和得分

因此接下來應該考慮的是為何狼會不願意持續奔跑,而寧願飛行和墜機

創作回應

更多創作