主題

狗頭人追逐 1

夏洛爾 | 2022-11-29 16:17:45 | 巴幣 2 | 人氣 182

Kobold Sentinel Run V1

實驗目標:

1.進入靜立狀態後，進入追逐狀態，在追逐狀態下，要能持續跑至接近目標的距離內

2.動作引導為雙臂展開身體前傾的帥氣奔跑動作

實驗設計:

1.任何弱點觸地皆失敗 (尾巴和劍並非弱點)

2.使用ClampReward

if(koboldBodies[i].damageCoef > 0f){clampReward += -0.1f * koboldBodies[i].damageCoef;}

3.

//Set: judge.endEpisode = false

//Set: nearModeRange = 1f

//Set: weapon, tail is not weakness. If is, Stand would back to GetUp

if(weaknessOnGround)

{

if(inferenceMode)

{

brainMode = BrainMode.GetUp;

SetModel("KoboldGetUp", getUpBrain);

behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;

}

else

{

AddReward(-1f);

judge.outLife++;

judge.Reset();

return;

}

else if(koboldRoot.localPosition.y < -10f)

{

if(inferenceMode)

{

brainMode = BrainMode.GetUp;

SetModel("KoboldGetUp", getUpBrain);

behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;

}

else

{

AddReward(-1f);

judge.outY++;

judge.Reset();

return;

}

else

{

targetSmoothPosition = targetPositionBuffer.GetSmoothVal();

headDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldHeadRb.position);

rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldRootRb.position);

flatTargetVelocity = rootDir;

flatTargetVelocity.y = 0f;

targetDistance = flatTargetVelocity.magnitude;

lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.up, headDir));

upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.right * -1f, Vector3.up));

aimVelocity = flatTargetVelocity.normalized;

aimVelocity.y = 0.2f;

//Lean

Vector3 leanDir = rootAimRot * flatTargetVelocity;

spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldSpine.right * -1f, leanDir));

rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRoot.up, leanDir));

//Naruto Arm

Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);

leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftUpperArm.right, leftUpperArmAimRot * flatTargetVelocity));

leftForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftForeArm.right, leftForeArmAimRot * flatTargetVelocity));

rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightUpperArm.right, rightUpperArmAimRot * flatTargetVelocity));

rightForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightForeArm.right, rightForeArmAimRot * flatTargetVelocity));

weaponAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldWeapon.up, weaponAimRot * flatTargetVelocity));

tailRootAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailRoot.right *-1, flatTargetVelocity));

leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftThigh.forward * -1f, flatLeftDir));

rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightThigh.forward * -1f, flatLeftDir));

avgVelocity = velocityBuffer.GetSmoothVal();

velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);

velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);

flatVelocity = avgVelocity;

flatVelocity.y = 0f;

flatVelocityManitude = flatVelocity.magnitude;

velocityCoef = Mathf.InverseLerp(0f, 8f, Vector3.Project(avgVelocity, aimVelocity).magnitude );

flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);

if(!inferenceMode)

{

if(targetDistance > nearModeRange)

{

if(Time.fixedTime - landingMoment > landingBufferTime)

{

bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);

bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);

float motionLimit = Mathf.Lerp(0f, 0.5f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);

float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);

bool outMotion = lookAngle < motionLimit2

|| upAngle < motionLimit2

|| leftThighAngle < motionLimit2

|| rightThighAngle < motionLimit2

|| spineUpAngle < motionLimit

|| rootUpAngle < motionLimit

|| leftUpperArmAngle < motionLimit

|| leftForeArmAngle < motionLimit

|| rightUpperArmAngle < motionLimit

|| rightForeArmAngle < motionLimit

|| weaponAngle < motionLimit;

if( outSpeed || outDirection || outMotion)

{

AddReward(-1f);

if(outSpeed)

{

judge.outSpeed++;

}

if(outDirection)

{

judge.outDirection++;

}

if(outMotion)

{

judge.outMotion++;

}

judge.Reset();

return;

}

lastReward = (velocityAngleCoef + velocityCoef) * 0.02f

+ (lookAngle+upAngle) * 0.0125f + (leftThighAngle+rightThighAngle) * 0.0075f

+ (spineUpAngle+rootUpAngle) * 0.005f

+ (leftUpperArmAngle+leftForeArmAngle+rightUpperArmAngle+rightForeArmAngle+weaponAngle+tailRootAngle ) * 0.001f

+ (1f - exertionRatio) * 0.004f;

totalReward += lastReward;

AddReward( lastReward );

}

// else if(targetDistance > 1.5f)

else

{

// AddReward(1f);

judge.survived++;

judge.Reset();

return;

}

//大致來說，

--1.獎勵視線，並使用Force Sharping

--2.獎勵投影至"跑動推薦向量"的速度和角度，並使用Force Sharping

--3.獎勵Root、Spine、雙臂、武器特定向量(forward/up/right)符合指定角度，並使用Force Sharping

--4.獎勵尾巴根部符合指定角度，但"並不使用Force Sharping"

--5.獎勵減少動作變化

實驗時間:

Step: 5e7

Time Elapsed: 85149s (23.65hr)

實驗結果:

實驗結果為部份成功，部分失敗

狗頭人哨兵的確可以奔跑，但都無法持續奔跑太久，跑一跑會因為不明原因跌倒

目前看不出來理由，不像是被武器或肢體絆倒

因此猜測是ForceSharping造成的最後關頭孤注一擲或是由於有縮放導致訓練數不足

另外姿勢也微妙，有一種躡手躡腳的感覺，外加尾巴幾乎常態觸地，這裡發現除了平穩，尾巴的引導向量也有錯誤

因此下個實驗預計

1.修正尾巴獎勵向量，並將尾巴三段都加入獎勵項目

2.取消武器方向的Force Sharping

3.使用ClampReward (尾巴和武器)

4.放大出生範圍以便增加長距離跑動的機率，雖然會增加飛出場外的機率，但由於本次乃訓練Run因此飛出場外期間屬於GetUp，不會消耗Run的訓練步數，導致訓練中雜訊太多

雖然可能因此讓訓練效率變差

而暫時預計如果下個實驗結果類似，會嘗試將訓練步數倍增

#UnityML #自律感知演化物理性角色 #紅蓮玩偶 #狗頭人哨兵

1

留言

共0則留言

夏洛爾 sherlore

追蹤創作集

作者相關創作

作品資料夾

狗頭人 追逐 1

作者相關創作

更多創作

狗頭人追逐 1