open-goal-jak-project/goal_src/jak2/engine/gfx/foreground/bones.gc

515 lines
18 KiB
Common Lisp

;;-*-Lisp-*-
(in-package goal)
;; name: bones.gc
;; name in dgo: bones
;; dgos: ENGINE, GAME
#|@file
bones, joints, cspace, matrix, joint-anim-frames...
The "joints" define the structure of the skeleton, just storing a bind pose, and parent info. These don't change,
and are loaded as part of the skeleton. The actual joint objects are stored in the art-joint-geo.
The "do-joint-math" method of process-drawable is responsible for computing the joint-anim-frame and the bones.
The "cspace" system is used to compute bones from joint-anim-frames.
The "joint-anim-frame" is a list of transformqs for the relative transform between bones.
The "bones" are the world space transform of each 'bone'
The "do-joint-math" function does:
- call the generate-frame-function callback to generate a joint-anim-frame (does the joint anim decompress/blending)
- call the prebind-function which can modify the relative transforms (eg: joint-mods)
- walk the tree of joints, computing bones using the cspace
- call the postbind-function, for other effects.
The "bones" system creates matrices for foreground renderers from bones/joints.
The inputs are bones/joints, and the output is pris-mtx, which is a 4x4 "t-mtx" for transforming, and a 3x3 (stored in 3 vec4's)
n-mtx for rotating normals.
all matrices are calculated at once, and shared between merc/emerc/gmerc/shadow
this is done by a linked list of "bone calculations", which is stashed in the dma buffer.
|#
;; todo check bad reference.
;; DECOMP BEGINS
;; list node for pending bone calculations. These are terminated by 0's.
(deftype bone-calculation-list (structure)
((first bone-calculation)
(next bone-calculation)
)
)
;; head of bone calculation list
(define *bone-calculation-list* (new 'global 'bone-calculation-list))
(defun bone-list-init ()
"Reset bone calculation list to have nothing in it."
(let ((v1-0 *bone-calculation-list*))
(set! (-> v1-0 first) (the-as bone-calculation 0))
(set! (-> v1-0 next) (the-as bone-calculation 0))
)
0
0
(none)
)
;; clear list.
(bone-list-init)
;; VU0 microprogram for bone calculations.
(define bones-vu0-block (new 'static 'vu-function))
(defun bones-set-sqwc ()
"Set the sqwc register so the DMA transfer of joints skips the metadata and just transfers the bind-pose."
(set! (-> (the-as dma-bank-control #x1000e000) sqwc) (new 'static 'dma-sqwc :sqwc #x1 :tqwc #x4))
(none)
)
(defun bones-reset-sqwc ()
"Reset sqwc so DMA works normally again (see bones-set-sqwc)"
(set! (-> (the-as dma-bank-control #x1000e000) sqwc) (new 'static 'dma-sqwc :sqwc #x1 :tqwc #x1))
(none)
)
(defun bones-init ((arg0 dma-buffer))
"Initialize scratchpad/vu0 for BONE MATH."
(let ((v1-1 (-> (scratchpad-object bone-memory) work)))
(+ #x70000000 0)
;; scratchpad layout of double-buffered bone calculation.
;; "joints", which are stripped of their metadata qw and are 4qw matrices
(set! (-> v1-1 layout joint 0) (-> (scratchpad-object bone-memory) buffer 0 joint))
(set! (-> v1-1 layout joint 1) (-> (scratchpad-object bone-memory) buffer 1 joint))
;; "bones", which are 5 qw transform + scale separately
(set! (-> v1-1 layout bone 0) (-> (scratchpad-object bone-memory) buffer 0 bone))
(set! (-> v1-1 layout bone 1) (-> (scratchpad-object bone-memory) buffer 1 bone))
;; "output" pris-mtx.
(set! (-> v1-1 layout output 0) (-> (scratchpad-object bone-memory) buffer 0 output))
(set! (-> v1-1 layout output 1) (-> (scratchpad-object bone-memory) buffer 1 output))
)
;; upload the bones vu0 block.
(unless PC_PORT
(let ((gp-0 *vu0-dma-list*))
(let ((v1-2 gp-0))
(set! (-> v1-2 base) (-> v1-2 data))
(set! (-> v1-2 end) (&-> v1-2 data-buffer (-> v1-2 allocated-length)))
)
(dma-buffer-add-vu-function gp-0 bones-vu0-block 0)
(let* ((v1-3 gp-0)
(a0-20 (the-as object (-> v1-3 base)))
)
(set! (-> (the-as dma-packet a0-20) dma) (new 'static 'dma-tag :id (dma-tag-id end)))
(set! (-> (the-as (pointer uint64) a0-20) 1) (the-as uint 0))
(set! (-> v1-3 base) (&+ (the-as pointer a0-20) 16))
)
(.sync.l)
(dma-buffer-send-chain (the-as dma-bank-source #x10008000) gp-0)
)
)
0
(none)
)
;; note that the original game doesn't quite use a normal calling convention here, so we cheat and pass an extra arg.
;; see the C++ code for more details.
(def-mips2c bones-mtx-calc (function (inline-array pris-mtx) (inline-array joint) (inline-array bone) uint object none))
(defun matrix-*float! ((output matrix3) (input matrix3) (x float))
(dotimes (i 12)
(set! (-> output data i) (* x (-> input data i)))
)
output
)
(defun matrix*!-first-three ((arg0 matrix3) (arg1 matrix3) (arg2 matrix))
"Set dst = src1 * src2. It is okay for any arguments to be the same data.
This is a moderately efficient implementation."
(rlet ((acc :class vf)
(vf10 :class vf)
(vf11 :class vf)
(vf12 :class vf)
(vf14 :class vf)
(vf15 :class vf)
(vf16 :class vf)
(vf18 :class vf)
(vf19 :class vf)
(vf20 :class vf)
)
(.lvf vf10 (&-> arg1 quad 0))
(.lvf vf14 (&-> arg2 quad 0))
(.lvf vf15 (&-> arg2 quad 1))
(.lvf vf16 (&-> arg2 quad 2))
(.lvf vf11 (&-> arg1 quad 1))
(.lvf vf12 (&-> arg1 quad 2))
(.mul.x.vf acc vf14 vf10)
(.add.mul.y.vf acc vf15 vf10 acc)
(.add.mul.z.vf vf18 vf16 vf10 acc)
(.mul.x.vf acc vf14 vf11)
(.add.mul.y.vf acc vf15 vf11 acc)
(.add.mul.z.vf vf19 vf16 vf11 acc)
(.mul.x.vf acc vf14 vf12)
(.add.mul.y.vf acc vf15 vf12 acc)
(.add.mul.z.vf vf20 vf16 vf12 acc)
(.svf (&-> arg0 quad 0) vf18)
(.svf (&-> arg0 quad 1) vf19)
(.svf (&-> arg0 quad 2) vf20)
arg0
)
)
(defun new-bones-mtx-calc ((output (inline-array pris-mtx)) (joints (inline-array joint)) (bones (inline-array bone)) (cam matrix) (count int))
(dotimes (i (- count 1))
(let ((b (-> bones (+ i 1) transform))
(j (-> joints i bind-pose))
(out (-> output (+ i 1)))
)
;; multiply by bind pose
;; mult swaps the args
(matrix*! (-> out t-mtx) j b)
;; clever way to compute inverse transpose of a 3x3:
(vector-cross! (-> out n-mtx vector 0)
(-> out t-mtx vector 1)
(-> out t-mtx vector 2)
)
(vector-cross! (-> out n-mtx vector 1)
(-> out t-mtx vector 2)
(-> out t-mtx vector 0)
)
(vector-cross! (-> out n-mtx vector 2)
(-> out t-mtx vector 0)
(-> out t-mtx vector 1)
)
(let ((scale (/ 1. (vector-dot (-> out n-mtx vector 0) (-> out t-mtx vector 0)))))
(matrix-*float! (-> out n-mtx) (-> out n-mtx) scale)
)
;; multiply by camera
(matrix*! (-> out t-mtx) (-> out t-mtx) cam)
(matrix*!-first-three (-> out n-mtx) (-> out n-mtx) cam) ;; WRONG!!
)
)
)
(defmacro .cross.vf (out a b)
`(begin
(.outer.product.a.vf acc ,a ,b)
(.outer.product.b.vf ,out ,b ,a acc)
)
)
(defun new-bones-mtx-calc-asm ((output (inline-array pris-mtx)) (joints (inline-array joint)) (bones (inline-array bone)) (cam matrix) (count int))
;; (declare (print-asm))
(dotimes (i (- count 1))
(let ((b (-> bones (+ i 1) transform))
(j (-> joints i bind-pose))
(out (-> output (+ i 1)))
)
(rlet (
(tmat0 :class vf)
(tmat1 :class vf)
(tmat2 :class vf)
(tmat3 :class vf)
(nmat0 :class vf)
(nmat1 :class vf)
(nmat2 :class vf)
(nmat3 :class vf)
(acc :class vf )
(vf0 :class vf )
(cam0 :class vf )
(cam1 :class vf )
(cam2 :class vf )
(cam3 :class vf )
)
(init-vf0-vector)
;; load bind-pose to tmat:
(.lvf tmat0 (&-> j quad 0))
(.lvf tmat1 (&-> j quad 1))
(.lvf tmat2 (&-> j quad 2))
(.lvf tmat3 (&-> j quad 3))
;; load bone to nmat
(.lvf nmat0 (&-> b quad 0))
(.lvf nmat1 (&-> b quad 1))
(.lvf nmat2 (&-> b quad 2))
(.lvf nmat3 (&-> b quad 3))
;; multiply, store in tmat
(.mul.x.vf acc nmat0 tmat0)
(.add.mul.y.vf acc nmat1 tmat0 acc)
(.add.mul.z.vf acc nmat2 tmat0 acc)
(.add.mul.w.vf tmat0 nmat3 tmat0 acc)
(.mul.x.vf acc nmat0 tmat1)
(.add.mul.y.vf acc nmat1 tmat1 acc)
(.add.mul.z.vf acc nmat2 tmat1 acc)
(.add.mul.w.vf tmat1 nmat3 tmat1 acc)
(.mul.x.vf acc nmat0 tmat2)
(.add.mul.y.vf acc nmat1 tmat2 acc)
(.add.mul.z.vf acc nmat2 tmat2 acc)
(.add.mul.w.vf tmat2 nmat3 tmat2 acc)
(.mul.x.vf acc nmat0 tmat3)
(.add.mul.y.vf acc nmat1 tmat3 acc)
(.add.mul.z.vf acc nmat2 tmat3 acc)
(.add.mul.w.vf tmat3 nmat3 tmat3 acc)
;; compute inverse transpose, storing in nmat
(.cross.vf nmat0 tmat1 tmat2)
(.cross.vf nmat1 tmat2 tmat0)
(.cross.vf nmat2 tmat0 tmat1)
;; dot nmat0 and tmat0
(.mul.vf acc nmat0 tmat0)
(.add.y.vf acc acc acc :mask #b1)
(.add.z.vf acc acc acc :mask #b1)
;; divide!
(.div.vf acc vf0 acc :fsf #b11 :ftf #b0)
;; scale nmat:
(.mul.x.vf nmat0 nmat0 acc)
(.mul.x.vf nmat1 nmat1 acc)
(.mul.x.vf nmat2 nmat2 acc)
;; load camera
(.lvf cam0 (&-> cam quad 0))
(.lvf cam1 (&-> cam quad 1))
(.lvf cam2 (&-> cam quad 2))
(.lvf cam3 (&-> cam quad 3))
;; multiply tmat by camera
(.mul.x.vf acc cam0 tmat0)
(.add.mul.y.vf acc cam1 tmat0 acc)
(.add.mul.z.vf acc cam2 tmat0 acc)
(.add.mul.w.vf tmat0 cam3 tmat0 acc)
(.mul.x.vf acc cam0 tmat1)
(.add.mul.y.vf acc cam1 tmat1 acc)
(.add.mul.z.vf acc cam2 tmat1 acc)
(.add.mul.w.vf tmat1 cam3 tmat1 acc)
(.mul.x.vf acc cam0 tmat2)
(.add.mul.y.vf acc cam1 tmat2 acc)
(.add.mul.z.vf acc cam2 tmat2 acc)
(.add.mul.w.vf tmat2 cam3 tmat2 acc)
(.mul.x.vf acc cam0 tmat3)
(.add.mul.y.vf acc cam1 tmat3 acc)
(.add.mul.z.vf acc cam2 tmat3 acc)
(.add.mul.w.vf tmat3 cam3 tmat3 acc)
;; store tmat
(.svf (&-> out t-mtx quad 0) tmat0)
(.svf (&-> out t-mtx quad 1) tmat1)
(.svf (&-> out t-mtx quad 2) tmat2)
(.svf (&-> out t-mtx quad 3) tmat3)
;; multiply nmat
(.mul.x.vf acc cam0 nmat0)
(.add.mul.y.vf acc cam1 nmat0 acc)
(.add.mul.z.vf nmat0 cam2 nmat0 acc)
(.mul.x.vf acc cam0 nmat1)
(.add.mul.y.vf acc cam1 nmat1 acc)
(.add.mul.z.vf nmat1 cam2 nmat1 acc)
(.mul.x.vf acc cam0 nmat2)
(.add.mul.y.vf acc cam1 nmat2 acc)
(.add.mul.z.vf nmat2 cam2 nmat2 acc)
;; store nmat
(.svf (&-> out n-mtx quad 0) nmat0)
(.svf (&-> out n-mtx quad 1) nmat1)
(.svf (&-> out n-mtx quad 2) nmat2)
)
)
)
(none)
)
(define *use-new-bones* #t)
(define *display-bone-stats* #f)
(define *num-bones* 0)
(defun bones-mtx-calc-execute ()
"Execute all bone matrix calculations."
(rlet ((vf1 :class vf)
(vf4 :class vf)
(vf5 :class vf)
(vf6 :class vf)
(vf7 :class vf)
(vf8 :class vf)
)
(with-profiler 'bones *profile-bones-color*
(reset! (-> *perf-stats* data (perf-stat-bucket bones)))
;; set up layout in scratchpad (again)
(let ((v1-24 (-> (scratchpad-object bone-memory) work)))
(set! (-> v1-24 layout joint 0) (-> (scratchpad-object bone-memory) buffer 0 joint))
(set! (-> v1-24 layout joint 1) (-> (scratchpad-object bone-memory) buffer 1 joint))
(set! (-> v1-24 layout bone 0) (-> (scratchpad-object bone-memory) buffer 0 bone))
(set! (-> v1-24 layout bone 1) (-> (scratchpad-object bone-memory) buffer 1 bone))
(set! (-> v1-24 layout output 0) (-> (scratchpad-object bone-memory) buffer 0 output))
(set! (-> v1-24 layout output 1) (-> (scratchpad-object bone-memory) buffer 1 output))
)
;; set sqwc to upload joints. This will skip the 1 qw of metadata and just grab the joint matrix.
;; (set! (-> (the-as dma-bank-control #x1000e000) sqwc) (new 'static 'dma-sqwc :sqwc #x1 :tqwc #x4))
;; loop over bone calculations:
(let* ((v1-26 *bone-calculation-list*)
(gp-1 *identity-matrix*)
(s5-1 (-> *math-camera* camera-rot))
(s4-1 (-> v1-26 first))
)
(set! *num-bones* 0)
(while (nonzero? s4-1) ;; loop
;; pick correct matrix. The pris-mtx includes camera rotation, so all that's needed in the final
;; renderers is perspective. (unless you set no-cam-rot)
(let ((v1-29 (if (logtest? (-> s4-1 flags) (bone-calc-flags no-cam-rot))
gp-1
s5-1
)
)
)
;; calculate!
(if *use-new-bones*
(new-bones-mtx-calc-asm (-> s4-1 matrix-area) (-> s4-1 joints) (-> s4-1 bones) v1-29 (the int (-> s4-1 num-bones)))
(bones-mtx-calc
(-> s4-1 matrix-area)
(-> s4-1 joints)
(-> s4-1 bones)
(-> s4-1 num-bones)
v1-29 ;; hack, to pass matrix to bones-mtx-calc in a better way.
)
)
)
(+! *num-bones* (-> s4-1 num-bones))
;; there is an optional post-processing step for ripple.
(when (logtest? (-> s4-1 flags) (bone-calc-flags write-ripple-data))
(let* ((a2-1 s4-1)
(v1-34 (-> a2-1 num-bones))
(a0-26 (the-as pris-mtx (-> a2-1 matrix-area)))
(a1-11 (the-as pris-mtx (-> a2-1 ripple-area)))
)
(.lvf vf1 (&-> a2-1 ripple-vec quad))
(label cfg-15)
(.lvf vf5 (&-> a0-26 t-mtx quad 1))
(.lvf vf6 (&-> a0-26 t-mtx trans quad))
(.lvf vf7 (&-> a0-26 n-mtx quad 0))
(.lvf vf8 (&-> a0-26 n-mtx quad 2))
(.mul.x.vf vf4 vf5 vf1)
(.mul.y.vf vf5 vf5 vf1)
(let ((a3-1 (-> a0-26 t-mtx quad 0)))
(.mul.z.vf vf7 vf7 vf1)
(let ((a2-2 (-> a0-26 t-mtx quad 2)))
(.mul.z.vf vf8 vf8 vf1)
(set! (-> a1-11 t-mtx quad 0) a3-1)
(let ((a3-2 (-> a0-26 n-mtx quad 1)))
(.sub.vf vf6 vf6 vf4)
(set! (-> a1-11 t-mtx quad 2) a2-2)
(.svf (&-> a1-11 t-mtx quad 1) vf5)
(set! (-> a1-11 n-mtx quad 1) a3-2)
)
)
)
(+! v1-34 -1)
(.svf (&-> a1-11 n-mtx quad 0) vf7)
;; (.addiu a0-26 a0-26 128)
(&+! a0-26 128)
(.svf (&-> a1-11 t-mtx trans quad) vf6)
;; (.addiu a1-11 a1-11 128)
(&+! a1-11 128)
(b! (nonzero? v1-34) cfg-15 :delay (.svf (&-> (the (inline-array vector) a1-11) -2 quad) vf8))
)
0
)
(set! s4-1 (-> s4-1 next))
)
)
(when *display-bone-stats*
(format *stdcon* "num-bones: ~D~%" *num-bones*)
)
;; reset sqwc
;; (set! (-> (the-as dma-bank-control #x1000e000) sqwc) (new 'static 'dma-sqwc :sqwc #x1 :tqwc #x1))
;; clear bone list
(bone-list-init)
(read! (-> *perf-stats* data (perf-stat-bucket bones)))
)
0
(none)
)
)
(defun-debug dump-qword ((arg0 qword))
(let ((v1-0 arg0))
(format
0
"~8,'0X: ~8,'0X ~8,'0X ~8,'0X ~8,'0X~%"
v1-0
(-> v1-0 data 0)
(-> v1-0 data 1)
(-> v1-0 data 2)
(-> v1-0 data 3)
)
)
0
(none)
)
(defun-debug dump-mem ((arg0 pointer) (arg1 int))
(dotimes (s4-0 arg1)
(format
0
"~8,'0X: ~8,'0X ~8,'0X ~8,'0X ~8,'0X"
(&+ arg0 (* (* s4-0 4) 4))
(-> (the-as (pointer uint32) (&+ arg0 (* (* s4-0 4) 4))))
(-> (the-as (pointer uint32) (&+ arg0 (* (+ (* s4-0 4) 1) 4))))
(-> (the-as (pointer uint32) (&+ arg0 (* (+ (* s4-0 4) 2) 4))))
(-> (the-as (pointer uint32) (&+ arg0 (* (+ (* s4-0 4) 3) 4))))
)
(format
0
" ~F ~F ~F ~F ~%"
(-> (the-as (pointer uint32) (&+ arg0 (* (* s4-0 4) 4))))
(-> (the-as (pointer uint32) (&+ arg0 (* (+ (* s4-0 4) 1) 4))))
(-> (the-as (pointer uint32) (&+ arg0 (* (+ (* s4-0 4) 2) 4))))
(-> (the-as (pointer uint32) (&+ arg0 (* (+ (* s4-0 4) 3) 4))))
)
)
0
(none)
)
(defun-debug bones-debug ()
0
(none)
)
(defun-debug dump-bone-mem ()
(format 0 "== joints 0 ===========~%")
(dump-mem (the-as pointer (+ 80 #x70000000)) 64)
(format 0 "== bones 0 ============~%")
(dump-mem (the-as pointer (+ 1104 #x70000000)) 80)
(format 0 "== output 0 ===========~%")
(dump-mem (the-as pointer (+ 2384 #x70000000)) 128)
(format 0 "~%~%~%")
(format 0 "== joints 1 ===========~%")
(dump-mem (the-as pointer (+ 4432 #x70000000)) 64)
(format 0 "== bones 1 ============~%")
(dump-mem (the-as pointer (+ 5456 #x70000000)) 80)
(format 0 "== output 1 ===========~%")
(dump-mem (the-as pointer (+ 6736 #x70000000)) 128)
(format 0 "========================~%~%")
0
(none)
)