前往
大廳
主題

狼 靜立 V3

夏洛爾 | 2022-12-06 22:35:07 | 巴幣 0 | 人氣 121


Wolf Stand V3

實驗目標:
1.進入站立瞬間後,由於其實可能仍處於不穩定狀態,要再進入靜立狀態
2.進入站立瞬間後,可能面向並沒有瞄準目標,要轉向目標
3.使用Clamp Reward避免快速自殺

實驗設計:
1.任何弱點觸地皆失敗 (尾巴也是弱點)
2.非弱點肢體 (然而狼目前沒有非弱點肢體)
if(wolfBodies[i].damageCoef > 0f){clampReward += -0.01f * wolfBodies[i].damageCoef;}
3.
//Set: judge.endEpisode = true//Set: judge.episodeLength = 10f//Set: useClampReward = trueif(weaknessOnGround){LogWeaknessOnGround();if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{//===Train Stand===AddReward(-1f);judge.outLife++;judge.Reset();return;//===Train Other===// brainMode = BrainMode.GetUp;// SetModel("WolfGetUp", getUpBrain);// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}}else if(wolfRoot.localPosition.y < -10f){if(inferenceMode){brainMode = BrainMode.GetUp;SetModel("WolfGetUp", getUpBrain);behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}else{//===Train Stand===AddReward(-1f);judge.outY++;judge.Reset();return;//===Train Other===// brainMode = BrainMode.GetUp;// SetModel("WolfGetUp", getUpBrain);// behaviorParameters.BehaviorType = BehaviorType.InferenceOnly;}}else if(targetDistance > 500f){judge.Reset();}else{targetSmoothPosition = targetPositionBuffer.GetSmoothVal();headDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfHeadRb.position);rootDir = targetSmoothPosition - stageBase.InverseTransformPoint(wolfRootRb.position);flatTargetVelocity = rootDir;flatTargetVelocity.y = 0f;targetDistance = flatTargetVelocity.magnitude;Vector3 forwardDir = flatTargetVelocity.normalized;Vector3 flatLeftDir = Vector3.Cross(flatTargetVelocity, Vector3.up);lookAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.right * -1f, headDir));//SideUpupAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfHead.forward, flatLeftDir));spineLookAngle = Mathf.InverseLerp(180f, 30f, Vector3.Angle(wolfSpine.right*-1f, forwardDir));//SideUpspineUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfSpine.forward, flatLeftDir));rootLookAngle = Mathf.InverseLerp(180f, 30f, Vector3.Angle(wolfRoot.forward, forwardDir));//SideUprootUpAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRoot.right*-1f, flatLeftDir));float velocityReward = GetVelocityReward(8f);float angularReward = GetAngularVelocityReward(10f);float standReward = (wolfLeftFeetBody.isStand? 0.25f : 0f) + (wolfRightFeetBody.isStand? 0.25f : 0f) + (wolfLeftHandBody.isStand? 0.25f : 0f) + (wolfRightHandBody.isStand? 0.25f : 0f);leftThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftThigh.right, Vector3.up));leftCalfAngle = Mathf.InverseLerp(180f, 10f, Vector3.Angle(wolfLeftCalf.right, Vector3.up));rightThighAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightThigh.right, Vector3.up));rightCalfAngle = Mathf.InverseLerp(180f, 10f, Vector3.Angle(wolfRightCalf.right, Vector3.up));leftUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfLeftUpperArm.right, Vector3.up));leftForeArmAngle = Mathf.InverseLerp(180f, 10f, Vector3.Angle(wolfLeftForeArm.right, Vector3.up));rightUpperArmAngle = Mathf.InverseLerp(180f, 0f, Vector3.Angle(wolfRightUpperArm.right, Vector3.up));rightForeArmAngle = Mathf.InverseLerp(180f, 10f, Vector3.Angle(wolfRightForeArm.right, Vector3.up));// ===Train Stand===if(Time.fixedTime - landingMoment > landingBufferTime){bool outVelocity = velocityReward > Mathf.Lerp(1f, 0.3f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outAngularVelocity = angularReward > Mathf.Lerp(1f, 0.5f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);bool outSpeed = outVelocity || outAngularVelocity;float aimLimit = Mathf.Lerp(0f, 0.75f, (Time.fixedTime - landingMoment - landingBufferTime)/3f);float aimLimit2 = Mathf.Lerp(0f, 0.85f, (Time.fixedTime - landingMoment - landingBufferTime)/4f);bool outDirection = lookAngle < aimLimit2 || upAngle < aimLimit2;bool outMotion = spineUpAngle < aimLimit || rootUpAngle < aimLimit || spineLookAngle < aimLimit2 || rootLookAngle < aimLimit2;if( outSpeed || outDirection || outMotion){AddReward(-1f);if(outSpeed){Debug.Log("outSpeed");Debug.Log("outVelocity: " + outVelocity);Debug.Log("outAngularVelocity: " + outAngularVelocity);judge.outSpeed++;}if(outDirection){Debug.Log("outDirection");Debug.Log("lookAngle: " + lookAngle);Debug.Log("upAngle: " + upAngle);Debug.Log("spineLookAngle: " + spineLookAngle);Debug.Log("rootLookAngle: " + rootLookAngle);judge.outDirection++;}if(outMotion){Debug.Log("outMotion");Debug.Log("spineUpAngle: " + spineUpAngle);Debug.Log("rootUpAngle: " + rootUpAngle);judge.outMotion++;}judge.Reset();return;}}//===Train Stand===lastReward = (1f-velocityReward) * 0.01f + (1f-angularReward) * 0.01f+ (lookAngle + upAngle + spineLookAngle + spineUpAngle + rootLookAngle + rootUpAngle) * 0.005f + standReward * 0.01f+ (leftThighAngle + leftCalfAngle + rightThighAngle + rightCalfAngle + leftUpperArmAngle + leftForeArmAngle + rightUpperArmAngle + rightForeArmAngle) * 0.003f+ (1f - exertionRatio) * 0.006f;if(lookAngle > 0.9f && upAngle > 0.9f  && spineLookAngle > 0.9f  && rootLookAngle > 0.9f && velocityReward < 0.3f && angularReward < 0.5f && standReward > 0.9f&& leftThighAngle > 0.8f && rightThighAngle > 0.8f && leftUpperArmAngle > 0.8f && rightUpperArmAngle > 0.8f){//===Train Stand===Debug.Log("Stand");totalReward += 0.01f;AddReward( 0.01f );//===Enter Run===// ConfirmArrived();// brainMode = BrainMode.Run;// SetModel("WolfTrain", runBrain);// behaviorParameters.BehaviorType = BehaviorType.Default;// landingMoment = Time.fixedTime;}//===Train Stand===if(useClampReward){lastReward = lastReward+clampReward;if(lastReward < 0f) lastReward = 0f;}totalReward += lastReward;AddReward( lastReward );}
//大致來說
1.鼓勵面向,並使用ForceSharping
2.鼓勵抑制速度和角速度,並使用ForceSharping
3.鼓勵雙腳觸地
4.鼓勵抑制出力
5.使用ClampReward

4.使用initialize_from沿用一個略微失敗的模型 (在 狼靜立V2之後又有一個引導向量設定錯誤的版本)

實驗時間:
Step: 5e7
Time Elapsed: 101315s (28.14hr)

實驗結果:
實驗結果為成功,但不理想

狼會調整面向,但是有兩個問題
第一個就是有點慢
第二個就是不精準,會是調整過頭然後再調整回來,或是就停在一個不太理想的角度

首先應該是沿用了失敗模型可能導致狼有那個超大幅度調整面向的不良傾向,還有停在側身角度的傾向
另外應該是因為之前在引導向量設定錯誤時,也一度以為要求太嚴格而放寬Force Sharping數質
也許要調回更嚴謹的範疇

最後是也許Force Sharping也要加入容錯空間,目前看起來狼有機會有時觸發Out of Speed可能是Force Sharping數值設計不良,也導致狼後續不敢隨便調整面向,因為可能被判超速,於是乾脆定住
而超速有發生Out of Velocity 和 Out of AngualrVelocity,其中Out of Velocity必須注意尺寸問題,由於狼的尺寸會從1-5倍,因此同樣的Out of Velocity要求是不合理的,尺寸1的狼速度2,理應等同尺寸5的狼速度10,因此狼越大就越容易不敢動

Out of AngualrVelocity則不再此限,但應該允許容錯範圍避免過激處罰

最後後小腿很容易觸地,但會立即被判失敗可能也導致狼變得保守,後小腿和尾巴應該改為ClampReward

因此下個實驗
1.Force Sharping恢復嚴格水平 (完全等同Stand額外得分標準)
2.Out of Velocity根據尺寸調整
3.Force Sharping允許容錯範圍
4.尾巴和前後小腿改為ClampReward

創作回應

更多創作