前往
大廳
主題

狗頭人 追逐 1

夏洛爾 | 2022-11-29 16:17:45 | 巴幣 2 | 人氣 182


Kobold Sentinel Run V1

實驗目標:
1.進入靜立狀態後,進入追逐狀態,在追逐狀態下,要能持續跑至接近目標的距離內
2.動作引導為雙臂展開身體前傾的帥氣奔跑動作

實驗設計:
1.任何弱點觸地皆失敗 (尾巴和劍並非弱點)
2.使用ClampReward
if(koboldBodies[i].damageCoef > 0f){clampReward += -0.1f * koboldBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = false
//Set: nearModeRange = 1f
//Set: weapon, tail is not weakness. If is, Stand would back to GetUp

if(weaknessOnGround)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("KoboldGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
AddReward(-1f);
judge.outLife++;
judge.Reset();
return;
}
}
else if(koboldRoot.localPosition.y < -10f)
{
if(inferenceMode)
{
brainMode = BrainMode.GetUp;
SetModel("KoboldGetUp", getUpBrain);
behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;
}
else
{
AddReward(-1f);
judge.outY++;
judge.Reset();
return;
}
}
else
{
targetSmoothPosition = targetPositionBuffer.GetSmoothVal();
headDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldHeadRb.position);
rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(koboldRootRb.position);

flatTargetVelocity = rootDir;
flatTargetVelocity.y = 0f;
targetDistance = flatTargetVelocity.magnitude;
lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.up, headDir));
upAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldHead.right * -1f, Vector3.up));
aimVelocity = flatTargetVelocity.normalized;
aimVelocity.y = 0.2f;
//Lean
Vector3 leanDir = rootAimRot * flatTargetVelocity;
spineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldSpine.right * -1f, leanDir));
rootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRoot.up, leanDir));
//Naruto Arm
Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);
leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftUpperArm.right, leftUpperArmAimRot * flatTargetVelocity));
leftForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftForeArm.right, leftForeArmAimRot * flatTargetVelocity));
rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightUpperArm.right, rightUpperArmAimRot * flatTargetVelocity));
rightForeArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightForeArm.right, rightForeArmAimRot * flatTargetVelocity));
weaponAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldWeapon.up, weaponAimRot * flatTargetVelocity));
tailRootAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldTailRoot.right *-1, flatTargetVelocity));
leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldLeftThigh.forward * -1f, flatLeftDir));
rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(koboldRightThigh.forward * -1f, flatLeftDir));
avgVelocity = velocityBuffer.GetSmoothVal();
velocityAngle = Vector3.Angle(avgVelocity, aimVelocity);
velocityAngleCoef = Mathf.InverseLerp(180f, 0f, velocityAngle);
flatVelocity = avgVelocity;
flatVelocity.y = 0f;
flatVelocityManitude = flatVelocity.magnitude;
velocityCoef = Mathf.InverseLerp(0f, 8f, Vector3.Project(avgVelocity, aimVelocity).magnitude );
flatVelocityAngle = Vector3.Angle(flatVelocity, flatTargetVelocity);
if(!inferenceMode)
{
if(targetDistance > nearModeRange)
{
if(Time.fixedTime - landingMoment > landingBufferTime)
{
bool outSpeed = flatVelocityManitude < Mathf.Lerp(0f, 7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);
bool outDirection = flatVelocityAngle > Mathf.Lerp(180f, 10f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);
float motionLimit = Mathf.Lerp(0f, 0.5f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);
float motionLimit2 = Mathf.Lerp(0f, 0.7f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);
bool outMotion = lookAngle < motionLimit2
|| upAngle < motionLimit2
|| leftThighAngle < motionLimit2
|| rightThighAngle < motionLimit2
|| spineUpAngle < motionLimit
|| rootUpAngle < motionLimit
|| leftUpperArmAngle < motionLimit
|| leftForeArmAngle < motionLimit
|| rightUpperArmAngle < motionLimit
|| rightForeArmAngle < motionLimit
|| weaponAngle < motionLimit;
if( outSpeed || outDirection || outMotion)
{
AddReward(-1f);
if(outSpeed)
{
judge.outSpeed++;
}
if(outDirection)
{
judge.outDirection++;
}
if(outMotion)
{
judge.outMotion++;
}
judge.Reset();
return;
}
}
lastReward = (velocityAngleCoef + velocityCoef) * 0.02f
+ (lookAngle+upAngle) * 0.0125f + (leftThighAngle+rightThighAngle) * 0.0075f
+ (spineUpAngle+rootUpAngle) * 0.005f
+ (leftUpperArmAngle+leftForeArmAngle+rightUpperArmAngle+rightForeArmAngle+weaponAngle+tailRootAngle ) * 0.001f
+ (1f - exertionRatio) * 0.004f;

totalReward += lastReward;
AddReward( lastReward );
}
// else if(targetDistance > 1.5f)
else
{
// AddReward(1f);
judge.survived++;
judge.Reset();
return;
}
}
}

//大致來說,
--1.獎勵視線,並使用Force Sharping
--2.獎勵投影至"跑動推薦向量"的速度和角度,並使用Force Sharping
--3.獎勵Root、Spine、雙臂、武器特定向量(forward/up/right)符合指定角度,並使用Force Sharping
--4.獎勵尾巴根部符合指定角度,但"並不使用Force Sharping"
--5.獎勵減少動作變化

實驗時間:
Step: 5e7
Time Elapsed: 85149s (23.65hr)

實驗結果:
實驗結果為部份成功,部分失敗

狗頭人哨兵的確可以奔跑,但都無法持續奔跑太久,跑一跑會因為不明原因跌倒
目前看不出來理由,不像是被武器或肢體絆倒
因此猜測是ForceSharping造成的最後關頭孤注一擲或是由於有縮放導致訓練數不足

另外姿勢也微妙,有一種躡手躡腳的感覺,外加尾巴幾乎常態觸地,這裡發現除了平穩,尾巴的引導向量也有錯誤

因此下個實驗預計
1.修正尾巴獎勵向量,並將尾巴三段都加入獎勵項目
2.取消武器方向的Force Sharping
3.使用ClampReward (尾巴和武器)
4.放大出生範圍以便增加長距離跑動的機率,雖然會增加飛出場外的機率,但由於本次乃訓練Run因此飛出場外期間屬於GetUp,不會消耗Run的訓練步數,導致訓練中雜訊太多
雖然可能因此讓訓練效率變差

而暫時預計如果下個實驗結果類似,會嘗試將訓練步數倍增

0則留言

更多創作