主題

狼追逐 V10

夏洛爾 | 2023-02-23 15:28:51 | 巴幣 0 | 人氣 139

Wolf Run V10

Wolf Run V10 (Cirriculum)

實驗目標:

1.進入靜立狀態後，進入追逐狀態，在追逐狀態下，要能持續跑至接近目標的距離內

2.尺寸介於1-5倍

實驗設計:

1.任何弱點觸地皆失敗 (尾巴和四個小腿並非是弱點)

2.非弱點肢體

if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}

3.

//Set: judge.endEpisode = true//Set: judge.episodeLength = 30f//Set: useClampReward = true//Set: SharpingBuffer Len=250 Th=-0.4if(weaknessOnGround){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outLife++;judge.Reset();return;}}else if(wolfRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outY++;judge.Reset();return;}}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;Vector3 forwardDir = flatTargetVelocity.normalized;Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, headDir));//SideUpupAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.forward, flatLeftDir));aimVelocity = flatTargetVelocity.normalized;aimVelocity.y = 0.1f;spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.up*-1f, Vector3.up));//SideLookspineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.up, Vector3.up));//SideLookrootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right*-1f, flatLeftDir));leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftThigh.forward * -1f, flatLeftDir));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightThigh.forward * -1f, flatLeftDir));//For Sync runVector3 leftThighUpDir = Vector3.ProjectOnPlane(wolfLeftThigh.right, flatLeftDir);Vector3 rightThighUpDir = Vector3.ProjectOnPlane(wolfRightThigh.right, flatLeftDir);float thighUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftThighUpDir, rightThighUpDir));leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftUpperArm.forward * -1f, flatLeftDir));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightUpperArm.forward * -1f, flatLeftDir));//For Sync runVector3 leftUpperArmUpDir = Vector3.ProjectOnPlane(wolfLeftUpperArm.right, flatLeftDir);Vector3 rightUpperArmUpDir = Vector3.ProjectOnPlane(wolfRightUpperArm.right, flatLeftDir);float upperArmUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftUpperArmUpDir, rightUpperArmUpDir));tailAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfTail.right, flatTargetVelocity));avgVelocity = velocityBuffer.GetSmoothVal();velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);flatVelocity = avgVelocity;flatVelocity.y = 0f;flatVelocityManitude = flatVelocity.magnitude;velocityCoef = Mathf.InverseLerp(0f, 10f*currentSize, Vector3.Project(avgVelocity, aimVelocity).magnitude );flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);if(!inferenceMode){if(targetDistance > nearModeRange){if(Time.fixedTime - landingMoment > landingBufferTime){bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f*currentSize, (Time.fixedTime - landingMoment - landingBufferTime)/7f);bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);// float motionLimit = Mathf.Lerp(0.0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);// float motionLimit3 = Mathf.Lerp(0.0f, 0.8f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float sharpingResetVal = Mathf.Lerp(0f, sharpingResetThreshould, (Time.fixedTime - landingMoment - landingBufferTime - 2f)/5f);// bool outMotion = lookAngle < motionLimit // || upAngle < motionLimit // || leftThighAngle < motionLimit2 // || rightThighAngle < motionLimit2 // || spineLookAngle < motionLimit // || rootLookAngle < motionLimit// || spineUpAngle < motionLimit3 // || rootUpAngle < motionLimit3 // || thighUpAngle < motionLimit2 // || upperArmUpAngle < motionLimit2;// || leftUpperArmAngle < motionLimit2 // || rightUpperArmAngle < motionLimit2;bool outMotion = thighUpAngle < motionLimit2 || upperArmUpAngle < motionLimit2;if( outSpeed || outDirection || outMotion){// AddReward(-1f);if(outSpeed){#if UNITY_EDITORDebug.Log("outSpeed");#endif// clampReward += -0.1f;judge.outSpeed++;}if(outDirection){#if UNITY_EDITORDebug.Log("outDirection");#endif// clampReward += -0.1f;judge.outDirection++;}if(outMotion){#if UNITY_EDITORDebug.Log("outMotion");#endif// clampReward += -0.1f;judge.outMotion++;}sharpingBuffer.PushVal(-1f);// judge.Reset();// return;}else{sharpingBuffer.PushVal(0f);}#if UNITY_EDITORsharpingVal = sharpingBuffer.GetSmoothVal();#endifif( sharpingBuffer.GetSmoothVal() < sharpingResetVal){AddReward(-0.7f);judge.Reset();return;}}if(useStep){if(IsOverSteps()){// AddReward(-0.5f);judge.outY++;judge.Reset();return;}else{AverageSteps( 0.3f * Time.fixedDeltaTime);}}else{useStep = true;ResetSteps();}bool isFalling = avgVelocity.y < 0f;// bool isFalling = false;if(isFalling){lastReward = 0f;}else{/*lastReward = velocityCoef * 0.1f + velocityAngleCoef * 0.02f+ (lookAngle+upAngle) * 0.01f + (leftThighAngle+rightThighAngle+leftUpperArmAngle+rightUpperArmAngle) * 0.0025f+ (spineLookAngle+rootLookAngle+spineUpAngle+rootUpAngle) * 0.005f+ (tailAngle) * 0.005f+ (thighUpAngle + upperArmUpAngle) * 0.02f+ (1f - exertionRatio) * 0.005f;*/lastReward = velocityCoef * 0.1f + velocityAngleCoef * 0.02f+ (thighUpAngle + upperArmUpAngle) * 0.02f;}if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < 0f) lastReward = 0f;}totalReward += lastReward;AddReward( lastReward );}// else if(targetDistance > 1.5f)else{// SetReward(1f);judge.survived++;judge.Reset();return;}}}

//大致來說

獎勵速度

獎勵速度方向

獎勵雙腳同步

下墜無法得分

實驗時間:

Step: 5e8

Time Elapsed:

--Normal: 197733s (54.92hr)

--Cirriculum: 176858s (49.13hr)

實驗結果:

實驗結果為失敗(Normal) 和部份成功(Cirriculum)

Normal組還是跑超慢，而且沒有雙腳同步

Cirriculum在完全沒改動情況下訓練，結果表現比Normal組好非常多

所以令人訝異的發現，透過Cirriculum來逼迫ML可能反而會讓表現變好

尤其觀測得分曲線，每次Cirriculum進入新的Lesson後，得分雖然會立即降低，但是成長性都會拉高

反過來說，原本2秒的landingBufferTime可能太長，長到狼會已經產生Gait，並導致後續Force Sharping效果劣化

然後重新考慮過，覺得對於ML來說連續性可能非常重要

因此下墜無法得分可能是個不好的設計

因此下個實驗

1.壓迫landingBufferTime

從1.2開始到0.5

2.取消下墜無法得分

同時拉高目標速度的角度

3.Force Sharping扣分等化

原本Force Sharping扣分略比觸地更少，希望鼓勵狼寧願犯錯，也不要撞地 (跑不好沒關係，先求別跌倒就好)

但有可能導致狼寧願犯錯，也不願承受撞地的風險 (寧願不跑)

因此調整回同等扣分

好，這次也對下個實驗很樂觀

#紅蓮人偶 #自律感知演化物理性角色 #狼

0

留言

狼 追逐 V10

創作回應

作者相關創作

遠距攻擊我最拿手！

相關創作

更多創作

狼追逐 V10