主題

狼追逐 v3

夏洛爾 | 2022-12-27 15:39:02 | 巴幣 2 | 人氣 216

Wolf Run V3

實驗目標:

1.進入靜立狀態後，進入追逐狀態，在追逐狀態下，要能持續跑至接近目標的距離內

2.尺寸介於1-5倍

實驗設計:

1.任何弱點觸地皆失敗 (尾巴和四個小腿並非是弱點)

2.非弱點肢體

if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}

3.

//Set: judge.endEpisode = true//Set: judge.episodeLength = 15f//Set: useClampReward = true//Set: SharpingBuffer Len=250 Th=-0.4if(weaknessOnGround){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outLife++;judge.Reset();return;}}else if(wolfRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{AddReward(-1f);judge.outY++;judge.Reset();return;}}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;Vector3 forwardDir = flatTargetVelocity.normalized;Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, headDir));//SideUpupAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.forward, flatLeftDir));aimVelocity = flatTargetVelocity.normalized;aimVelocity.y = 0.2f;spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.up*-1f, Vector3.up));//SideLookspineLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.up, Vector3.up));//SideLookrootLookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right*-1f, flatLeftDir));leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftThigh.forward * -1f, flatLeftDir));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightThigh.forward * -1f, flatLeftDir));//For Sync runVector3 leftThighUpDir = Vector3.ProjectOnPlane(wolfLeftThigh.right, flatLeftDir);Vector3 rightThighUpDir = Vector3.ProjectOnPlane(wolfRightThigh.right, flatLeftDir);float thighUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftThighUpDir, rightThighUpDir));leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftUpperArm.forward * -1f, flatLeftDir));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightUpperArm.forward * -1f, flatLeftDir));//For Sync runVector3 leftUpperArmUpDir = Vector3.ProjectOnPlane(wolfLeftUpperArm.right, flatLeftDir);Vector3 rightUpperArmUpDir = Vector3.ProjectOnPlane(wolfRightUpperArm.right, flatLeftDir);float upperArmUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(leftUpperArmUpDir, rightUpperArmUpDir));tailAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfTail.right, flatTargetVelocity));avgVelocity = velocityBuffer.GetSmoothVal();velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);flatVelocity = avgVelocity;flatVelocity.y = 0f;flatVelocityManitude = flatVelocity.magnitude;velocityCoef = Mathf.InverseLerp(0f, 15f*currentSize, Vector3.Project(avgVelocity, aimVelocity).magnitude );flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);if(!inferenceMode){if(targetDistance > nearModeRange){if(Time.fixedTime - landingMoment > landingBufferTime){bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f*currentSize, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);float motionLimit = Mathf.Lerp(0.5f, 0.9f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);float motionLimit3 = Mathf.Lerp(0.5f, 0.85f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);float motionLimit2 = Mathf.Lerp(0.3f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);float sharpingResetVal = Mathf.Lerp(0f, sharpingResetThreshould, (Time.fixedTime - landingMoment - landingBufferTime - 2f)/5f);bool outMotion = lookAngle < motionLimit || upAngle < motionLimit || leftThighAngle < motionLimit2 || rightThighAngle < motionLimit2 || spineLookAngle < motionLimit || rootLookAngle < motionLimit || spineUpAngle < motionLimit3 || rootUpAngle < motionLimit3 || thighUpAngle < motionLimit2 || upperArmUpAngle < motionLimit2 || leftUpperArmAngle < motionLimit2 || rightUpperArmAngle < motionLimit2;if( outSpeed || outDirection || outMotion){// AddReward(-1f);if(outSpeed){#if UNITY_EDITORDebug.Log("outSpeed");#endifclampReward += -0.05f;judge.outSpeed++;}if(outDirection){#if UNITY_EDITORDebug.Log("outDirection");#endifclampReward += -0.05f;judge.outDirection++;}if(outMotion){#if UNITY_EDITORDebug.Log("outMotion");#endifclampReward += -0.05f;judge.outMotion++;}sharpingBuffer.PushVal(-1f);// judge.Reset();// return;}else{sharpingBuffer.PushVal(0f);}#if UNITY_EDITORsharpingVal = sharpingBuffer.GetSmoothVal();#endifif( sharpingBuffer.GetSmoothVal() < sharpingResetVal){AddReward(-1f);judge.Reset();return;}}lastReward = (velocityAngleCoef + velocityCoef) * 0.02f + (lookAngle+upAngle) * 0.01f + (leftThighAngle+rightThighAngle+leftUpperArmAngle+rightUpperArmAngle) * 0.0025f+ (spineLookAngle+rootLookAngle+spineUpAngle+rootUpAngle) * 0.005f+ (tailAngle) * 0.005f+ (thighUpAngle + upperArmUpAngle) * 0.005f+ (1f - exertionRatio) * 0.005f;if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < -0.05f) lastReward = -0.05f;}totalReward += lastReward;AddReward( lastReward );}// else if(targetDistance > 1.5f)else{// AddReward(1f);judge.survived++;judge.Reset();return;}}}

//大致來說，

--1.獎勵視線，並使用Force Sharping

--2.獎勵投影至"跑動推薦向量"的速度和角度，並使用Force Sharping

--3.獎勵四個大腿的Side Look，並使用Force Sharping

--4.獎勵尾巴符合指定角度

--5.獎勵減少動作變化

--6.獎勵雙手和雙足要同步奔跑

--7.Motion相關的Force Sharping非從0開始--8.引導身體要盡量平行地面

--9.速度要求正比尺寸

--10.提高ForceSharping的要求，尤其進入階段

4.Force Sharping改為有容錯空間，但是容許值逆向Sharping

允許角色在5秒內發生總計2秒以內的失誤，希望藉此讓角色就算輕微失衡也能嘗試自行修正

但是容許值是逆向Sharping，會在開始Force Sharping後兩秒才逐步放寬標準

實驗時間:

Step: 5e7

Time Elapsed: 159896s (44.42hr)

實驗結果:

實驗結果為失敗

雖然狼比較不會翹起來

但以結果來說並沒有長程奔跑的能力

另外幾乎還是只使用三隻腳

按Log數據來說，狼幾乎都是被Force Sharping淘汰，才沒有演化出長程奔跑的能力

並且OutMotion和OutSpeed的比例約為2:1

而後期Reward雖然嚴格來說是持續成長，但近乎水平收斂，因此應該認為狼沒有被引導到長程奔跑的潛力狀態

而關於只會使用三隻腳

觀察起來，起步階段還是可能四腳並用，是已經跑起來之後才不會使用左前腳

這裡認為原因一樣是 "因為對狼來說不需要"

因為目前也沒有體力制，加上他僅用三隻腳就能達成完美平衡，所以對他來說使用左前腳是弊大於利的行為，目前處理的靈感有三

1.強制四腳輪流觸地

四隻腳必須輪流觸地否則直接淘汰

2.創造需求

原本提高速度標準就是希望狼要借助四腳加速才夠，但看來四腳動物演化出的四足奔跑是全身的高度協同動作，僅靠隨機探索有點難誘發那種協調性

因此另一個方向就是調整肌力，例如不靠四足很難自體平衡

但目前來看很困難，因為狼可以靠微調腳的跨步來影響平衡，而且狼目前也沒有疲勞問題，所以不用分散出力到不同肢體

某種程度可以說，只要紅蓮人偶和現實動物本質是不同的，所以很難自然達成相同的演化

3.全腿誘導

原本誘導前後腿要各自同步，是誘導大腿角度差不能太大，可以進一步連小腿角度差都不能太大，如此狼就不能把小腿蜷曲著

因此下個實驗為狼追逐

1.提高ForceSharping處罰，允許負值原本使用ClampReward是因為擔心可能會引導成快速自殺

但考慮ForceSharping有緩衝時間，因此狼會先進入得分狀態，其後才會開始遭遇處罰，因此很有可能不會發展自殺傾向

2.略為放寬OutMotion和OutSpeed的標準

3.四腳需輪流著地實驗強制四腳輪流觸地，因為沒有這種引導經驗，值得實驗

#自律感知演化物理性角色 #紅蓮人偶 #狼

1

留言

狼 追逐 v3

創作回應

作者相關創作

帶領大家向前行待邁進 💨

相關創作

更多創作

狼追逐 v3