主題

狼靜立 V4

夏洛爾 | 2022-12-08 11:13:27 | 巴幣 0 | 人氣 100

Wolf Stand V4

實驗目標:

1.進入站立瞬間後，由於其實可能仍處於不穩定狀態，要再進入靜立狀態

2.進入站立瞬間後，可能面向並沒有瞄準目標，要轉向目標

3.使用Clamp Reward避免快速自殺

實驗設計:

1.任何弱點觸地皆失敗 (尾巴和四個小腿並非是弱點)

2.非弱點肢體

if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}

3.

//Set: judge.endEpisode = true//Set: judge.episodeLength = 10f//Set: useClampReward = trueif(weaknessOnGround){LogWeaknessOnGround();if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{//===Train Stand===AddReward(-1f);judge.outLife++;judge.Reset();return;//===Train Other===// brainMode = BrainMode.GetUp;// SetModel("WolfGetUp", getUpBrain);// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}}else if(wolfRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{//===Train Stand===AddReward(-1f);judge.outY++;judge.Reset();return;//===Train Other===// brainMode = BrainMode.GetUp;// SetModel("WolfGetUp", getUpBrain);// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}}else if(targetDistance > 500f){judge.Reset();}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;Vector3 forwardDir = flatTargetVelocity.normalized;Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, headDir));//SideUpupAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.forward, flatLeftDir));spineLookAngle = Mathf.InverseLerp(180f, 30f, Vector3.Angle(wolfSpine.right*-1f, forwardDir));//SideUpspineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));rootLookAngle = Mathf.InverseLerp(180f, 30f, Vector3.Angle(wolfRoot.forward, forwardDir));//SideUprootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right*-1f, flatLeftDir));float velocityReward = GetVelocityReward(8f * currentSize);float angularReward = GetAngularVelocityReward(10f);float standReward = (wolfLeftFeetBody.isStand? 0.25f : 0f) + (wolfRightFeetBody.isStand? 0.25f : 0f) + (wolfLeftHandBody.isStand? 0.25f : 0f) + (wolfRightHandBody.isStand? 0.25f : 0f);leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftThigh.right, Vector3.up));leftCalfAngle = Mathf.InverseLerp(180f, 10f, Vector3.Angle(wolfLeftCalf.right, Vector3.up));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightThigh.right, Vector3.up));rightCalfAngle = Mathf.InverseLerp(180f, 10f, Vector3.Angle(wolfRightCalf.right, Vector3.up));leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftUpperArm.right, Vector3.up));leftForeArmAngle = Mathf.InverseLerp(180f, 10f, Vector3.Angle(wolfLeftForeArm.right, Vector3.up));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightUpperArm.right, Vector3.up));rightForeArmAngle = Mathf.InverseLerp(180f, 10f, Vector3.Angle(wolfRightForeArm.right, Vector3.up));// ===Train Stand===if(Time.fixedTime - landingMoment > landingBufferTime){bool outVelocity = velocityReward > Mathf.Lerp(1f, 0.3f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);bool outAngularVelocity = angularReward > Mathf.Lerp(1f, 0.5f, (Time.fixedTime - landingMoment - landingBufferTime)/5f);bool outSpeed = outVelocity || outAngularVelocity;float aimLimit = Mathf.Lerp(0f, 0.9f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);float aimLimit2 = Mathf.Lerp(0f, 0.9f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);bool outDirection = lookAngle < aimLimit2 || upAngle < aimLimit2 || spineLookAngle < aimLimit2 || rootLookAngle < aimLimit2;bool outMotion = spineUpAngle < aimLimit || rootUpAngle < aimLimit;if( outSpeed || outDirection || outMotion){// AddReward(-1f);if(outSpeed){clampReward += -0.005f;Debug.Log("outSpeed");Debug.Log("outVelocity: " + outVelocity);Debug.Log("outAngularVelocity: " + outAngularVelocity);judge.outSpeed++;}if(outDirection){clampReward += -0.005f;Debug.Log("outDirection");Debug.Log("lookAngle: " + lookAngle);Debug.Log("upAngle: " + upAngle);Debug.Log("spineLookAngle: " + spineLookAngle);Debug.Log("rootLookAngle: " + rootLookAngle);judge.outDirection++;}if(outMotion){clampReward += -0.005f;Debug.Log("outMotion");Debug.Log("spineUpAngle: " + spineUpAngle);Debug.Log("rootUpAngle: " + rootUpAngle);judge.outMotion++;}sharpingBuffer.PushVal(-1f);// judge.Reset();// return;}else{sharpingBuffer.PushVal(0f);}sharpingVal = sharpingBuffer.GetSmoothVal();// Debug.Log( sharpingBuffer.GetSmoothVal() );if( sharpingBuffer.GetSmoothVal() < sharpingResetThreshould){AddReward(-1f);judge.Reset();return;}}//===Train Stand===lastReward = (1f-velocityReward) * 0.01f + (1f-angularReward) * 0.01f+ (lookAngle + upAngle + spineLookAngle + spineUpAngle + rootLookAngle + rootUpAngle) * 0.005f + standReward * 0.01f+ (leftThighAngle + leftCalfAngle + rightThighAngle + rightCalfAngle + leftUpperArmAngle + leftForeArmAngle + rightUpperArmAngle + rightForeArmAngle) * 0.003f+ (1f - exertionRatio) * 0.006f;if(lookAngle > 0.9f && upAngle > 0.9f && spineLookAngle > 0.9f && rootLookAngle > 0.9f && velocityReward < 0.3f && angularReward < 0.5f && standReward > 0.9f&& leftThighAngle > 0.8f && rightThighAngle > 0.8f && leftUpperArmAngle > 0.8f && rightUpperArmAngle > 0.8f){//===Train Stand===// Debug.Log("Stand");lastReward += 0.01f;//===Enter Run===// ConfirmArrived();// brainMode = BrainMode.Run;// SetModel("WolfTrain", runBrain);// behaviorParameters.BehaviorType = BehaviorType.Default;// landingMoment = Time.fixedTime;}//===Train Stand===if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < 0f) lastReward = 0f;}totalReward += lastReward;AddReward( lastReward );}

大致來說

--1.鼓勵面向，並使用ForceSharping

--2.鼓勵抑制速度和角速度，並使用ForceSharping

--3.鼓勵雙腳觸地

--4.鼓勵抑制出力

--5.使用ClampReward

4.Force Sharping允許容錯範圍

每2秒允許總計0.5秒的犯錯

5.VelocityReward根據尺寸調整

實驗時間:

Step: 5e7

Time Elapsed: 90616s (25.17hr)

實驗結果:

實驗結果為成功，狼可以迅速調整面向來對準目標

美中不足是Survive率非常低

這裡認為是因為ClampReward導致失誤也只是零得分而沒有處罰

這種機制會導致ML非常勇於探索，因此才會就算對準目標後也會動來動去的

因此最終都還是被Force Sharping淘汰

這裡預期可達成優化的手法有二

1.ClampReward不要Clamp到0，可以Clamp到很小的負值，要把平衡調整成，失誤會讓ML覺得被處罰，但程度小到不會想快速自盡
2.立即負分程度也是逆向Sharping

這是個詭異的想法，原本會盡量避免相同觀察情況給予不同得分

但考慮目前ML出現Gait後幾乎就會定型，可以在Gait出現後才拉高負分，有可能ML只會輕微修正Gait，而不會又演變成快速自盡的Gait

稍微思考一下吧

Force Sharping的設計原本就不合理論，他是讓Agent隨著時間才變得更容易被處罰
但目前顯然對沒有時間意識的Agent也顯著有效，關鍵是他能收束Gait
所以理論上立即負分程度也是逆向Sharping大概是有效的

但也因此可以有手法三

3.對準面向前，抑制速度和角速度的得分不太重要，對準面向後，得分比重急遽拉高
這個作法是符合原理的，也是符合推薦的簡易型多工模型
雖然這個簡易多工模型我個人還沒有在實驗成功過

但總之因為狼已經很符合靜立目標

因此下個實驗終於要開始期待很久的狼追逐

1.根據尺寸調整速度要求

和狗頭人的設計目的不同，因此希望狼體型越大就要跑越快

2.逆向Sharping Force Sharping的容許值狼開始跑動後，才放寬跑動標準3.鼓勵四個大腿不要側開 (鼓勵Side Look角度)

4.鼓勵頭和軀幹保持筆直 (鼓勵Side Look角度)

5.鼓勵速度

#自律感知演化物理性角色 #UnityML #紅蓮人偶 #狼

0

留言

狼 靜立 V4

創作回應

作者相關創作

帶領大家向前行待邁進 💨

相關創作

更多創作

狼靜立 V4