diff --git a/Makefile b/Makefile index e18b72c..b9b6af4 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,10 @@ RENDERERS = \ general_tri \ general \ indexed \ - scei + indexed_constant_color \ + indexed_no_lights_pvc \ + scei \ + fast_no_lights_pvc_tri EE_OBJS += $(addsuffix .vo, $(addprefix vu1/, $(RENDERERS))) @@ -66,6 +69,7 @@ install: all cp -f $(EE_LIB) $(PS2SDK)/ports/lib clean: + rm -rf ./GS_DUMP/hard/* rm -f $(EE_OBJS_LIB) $(EE_OBJS) $(EE_BIN) $(EE_LIB) realclean: clean @@ -76,23 +80,86 @@ realclean: clean include $(PS2SDK)/Defs.make include $(PS2SDK)/samples/Makefile.eeglobal +.PHONY: one +one: + @test -n "$(VCL)" || (echo "Usage: make $@ VCL=path/to/foo.vcl"; exit 1) + $(MAKE) $(VCL:.vcl=_vcl.vsm) $(VCL:.vcl=.vo) + +## dvp-as origin in ps2dev toolchain: https://github.com/ps2dev/ps2toolchain/blob/master/scripts/001-dvp.sh +## Build .vo (VU object) from a compiled .vsm %.vo: %_vcl.vsm dvp-as -o $@ $< +# VCL (Vector Command Language): https://ps2linux.no-ip.info/playstation2-linux.com/projects/vcl.html +# for documentation download the x86 or win32 tar above and read the VCL_User_Manual_E_v1.4_1.pdf +# more resources on vsm: http://lukasz.dk/files/vu-instruction-manual.pdf %_vcl.vsm: %_pp4.vcl vcl -o$@ $< -%indexed_pp4.vcl: %indexed_pp3.vcl - cat $< | cc -E -P -imacros vu1/vu1_mem_indexed.h -o $@ - - -%_pp4.vcl: %_pp3.vcl - cat $< | cc -E -P -imacros vu1/vu1_mem_linear.h -o $@ - - +# GCC / CPP flags (-E, -P, -imacros): https://gcc.gnu.org/onlinedocs/cpp/Invocation.html#Invocation +# -E = preprocess only, -P = strip #line, -imacros includes macros without writing #include +vu1/%_pp4.vcl: vu1/%_pp3.vcl + @hdr=vu1/vu1_mem_linear.h; \ + case "$*" in \ + indexed|indexed_*) hdr=vu1/vu1_mem_indexed.h ;; \ + esac; \ + cat $< | cc -E -P -imacros $$hdr -o $@ - + +#TODO: remove this step? This could be covered simply from writing correct vcl code... unless intending to allow new and old syntax? +# you can standardize syntax by using ".syntax old" or ".syntax new" or by passing `-n` to VCL for "new" and writing sources +# accordingly, it might be better to allow for correcting towards that to avoid confusion... %_pp3.vcl: %_pp2.vcl cat $< | sed 's/\[\([0-9]\)\]/_\1/g ; s/\[\([w-zW-Z]\)\]/\1/g' - > $@ +# Expand assembly-style macros and .include with GASP +# -c ';' uses ';' as the comment char; -Ivu1 resolves local .include files. +# GASP (GNU assembler preprocessor) manpage: https://manpages.debian.org/unstable/binutils-m68hc1x/gasp.1.en.html %_pp2.vcl: %_pp1.vcl gasp -c ';' -Ivu1 -o $@ $< +# this is in order to normalize sources for GASP by removing C preprocessor stuff (#include/#define), +# and then fix local .include paths so GASP can resolve them relative to the source dir. +# if the .vcl file ALREADY avoids #include/#define and only use .include/.macro etc +# and wire %.vcl -> %_pp2.vcl directly and drop this rule?? %_pp1.vcl: %.vcl cat $< | sed 's/#include[ ]\+.\+// ; s/#define[ ]\+.\+// ; s|\(\.include[ ]\+\)"\([^/].\+\)"|\1"$( $@ + +# ---- build examples and create ./bin launchers --------------------- + +SHELL := /bin/bash +EXAMPLES_DIR := examples +BIN_DIR := bin +PCSX2_BIN ?= pcsx2 +PCSX2_FLAGS ?= -nogui -batch -fastboot -earlyconsolelog -logfile /dev/null + +.PHONY: examples clean-examples + +examples: + mkdir -p $(BIN_DIR) + # build each example (skip shared_code) + find $(EXAMPLES_DIR) -type f -name Makefile ! -path '*/shared_code/*' -print0 \ + | while IFS= read -r -d '' mf; do \ + dir=$$(dirname "$$mf"); \ + echo "==> make -C $$dir"; \ + $(MAKE) -C "$$dir"; \ + done; \ + # create launchers for every .elf (absolute path!) + find $(EXAMPLES_DIR) -type f -name '*.elf' -print0 \ + | while IFS= read -r -d '' elf; do \ + name=$$(basename "$${elf%.elf}"); \ + abs=$$(readlink -f "$$elf"); \ + echo "==> writing $(BIN_DIR)/$$name -> $$abs"; \ + printf '#!/usr/bin/env bash\n%s %s -elf %s "$$@"\n' \ + '$(PCSX2_BIN)' '$(PCSX2_FLAGS)' "$$abs" > "$(BIN_DIR)/$$name"; \ + chmod +x "$(BIN_DIR)/$$name"; \ + done + +clean-examples: + rm -rf ./GS_DUMP/hard/* + rm -rf $(BIN_DIR) + find $(EXAMPLES_DIR) -type f -name Makefile ! -path '*/shared_code/*' -print0 \ + | while IFS= read -r -d '' mf; do \ + dir=$$(dirname "$$mf"); \ + echo "==> clean $$dir"; \ + $(MAKE) -C "$$dir" clean || true; \ + done diff --git a/README.md b/README.md index b91dbc8..204c2d5 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,12 @@ Bug reports should be submitted to the appropriate homepage, which hosts a bug t ## Changelog +### 0.3.x (beginning revival attempts) +- Added documentation to Makefile for references to using vcl and other tools in 2025 (also updated some vcl for potential deprecation of vcl preprocess step 2: + `cat $< | sed 's/\[\([0-9]\)\]/_\1/g ; s/\[\([w-zW-Z]\)\]/\1/g' - > $@ ` +- Added a fast no lights per vertex color renderer with pvc_box examples (lit with `general_pv_diff_tri` and unlit `fast_no_lights_pvc_tri`) -- WIP, could be very slow for some reason +- begin testing against https://github.com/raylib4Consoles/raylib4PlayStation2 integration (will be useful for testing expected OpenGL1.1 behavior) + ### 0.3 - Can now define custom prim types and attributes, tying them to custom renderers and override default renderers. - Lots of bug fixes! diff --git a/examples/nehe/lesson03/lesson3.cpp b/examples/nehe/lesson03/lesson3.cpp index b4a3573..d178397 100644 --- a/examples/nehe/lesson03/lesson3.cpp +++ b/examples/nehe/lesson03/lesson3.cpp @@ -24,12 +24,6 @@ void init(GLvoid) // Create Some Everyday Functions glEnable(GL_DEPTH_TEST); // Enables Depth Testing glDepthFunc(GL_LEQUAL); // The Type Of Depth Testing To Do glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); - - // ps2gl needs lighting + color_material for per-vertex colors - glEnable(GL_COLOR_MATERIAL); - glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); - glEnable(GL_LIGHTING); - glEnable(GL_LIGHT0); } void display(void) // Create The Display Function diff --git a/examples/nehe/lesson04/lesson4.cpp b/examples/nehe/lesson04/lesson4.cpp index 2b44825..756ea46 100644 --- a/examples/nehe/lesson04/lesson4.cpp +++ b/examples/nehe/lesson04/lesson4.cpp @@ -27,12 +27,6 @@ void init(GLvoid) // Create Some Everyday Functions glEnable(GL_DEPTH_TEST); // Enables Depth Testing glDepthFunc(GL_LEQUAL); // The Type Of Depth Testing To Do glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); - - // ps2gl needs lighting + color_material for per-vertex colors - glEnable(GL_COLOR_MATERIAL); - glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); - glEnable(GL_LIGHTING); - glEnable(GL_LIGHT0); } void idle(void) { diff --git a/examples/nehe/lesson05/lesson5.cpp b/examples/nehe/lesson05/lesson5.cpp index 7edaa08..ea69e05 100644 --- a/examples/nehe/lesson05/lesson5.cpp +++ b/examples/nehe/lesson05/lesson5.cpp @@ -27,12 +27,6 @@ void InitGL(GLvoid) // Create Some Everyday Functions glEnable(GL_DEPTH_TEST); // Enables Depth Testing glDepthFunc(GL_LEQUAL); // The Type Of Depth Testing To Do glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); - - // ps2gl needs lighting + color_material for per-vertex colors - glEnable(GL_COLOR_MATERIAL); - glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); - glEnable(GL_LIGHTING); - glEnable(GL_LIGHT0); } void idle(void) { @@ -41,6 +35,10 @@ void idle(void) { void display(void) // Create The Display Function { + glDisable(GL_COLOR_MATERIAL); + glDisable(GL_LIGHTING); + glDisable(GL_LIGHT0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear Screen And Depth Buffer glLoadIdentity(); // Reset The Current Modelview Matrix glPushMatrix(); @@ -73,10 +71,20 @@ void display(void) // Create The Display Fu glVertex3f(-1.0f, -1.0f, 1.0f); // Right Of Triangle (Left) glEnd(); // Finished Drawing The Triangle + + glLoadIdentity(); // Reset The Current Modelview Matrix glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only + // TODO: next make a fast no lights for QUADS + // for now its interesting to see the lighting based one for quads only here: + glEnable(GL_COLOR_MATERIAL); + glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; + glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); glBegin(GL_QUADS); // Draw A Quad glColor3f(0.0f, 1.0f, 0.0f); // Set The Color To Blue glVertex3f(1.0f, 1.0f, -1.0f); // Top Right Of The Quad (Top) diff --git a/examples/pvc_box/Makefile b/examples/pvc_box/Makefile new file mode 100644 index 0000000..208cd0d --- /dev/null +++ b/examples/pvc_box/Makefile @@ -0,0 +1,37 @@ +EE_BIN = pvc_box.elf +EE_CFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CFLAGS) +EE_CXXFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CXXFLAGS) +#EE_OBJS = lit_pvc_box.o ../shared_code/text_stuff.o +EE_OBJS = unlit_pvc_box.o ../shared_code/text_stuff.o +EE_LDFLAGS += -L$(PS2SDK)/ports/lib +EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma + +ifeq ($(DEBUG), 1) + EE_CFLAGS += -D_DEBUG + EE_CXXFLAGS += -D_DEBUG +endif + +# Disabling warnings +WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null + +# VU0 code is broken so disable for now +EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM +EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM + +all: $(EE_BIN) + $(EE_STRIP) --strip-all $(EE_BIN) + +clean: + rm -f $(EE_BIN) $(EE_OBJS) + +run: $(EE_BIN) + ps2client -h 192.168.1.10 execee host:$(EE_BIN) + +reset: + ps2client -h 192.168.1.10 reset + +sim: $(EE_BIN) + PCSX2 --elf=$(PWD)/$(EE_BIN) + +include $(PS2SDK)/samples/Makefile.pref +include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/pvc_box/lit_pvc_box.cpp b/examples/pvc_box/lit_pvc_box.cpp new file mode 100644 index 0000000..eaaf065 --- /dev/null +++ b/examples/pvc_box/lit_pvc_box.cpp @@ -0,0 +1,150 @@ +#include +#include +#include +#include +#include +#include "ps2gl/renderermanager.h" +#include + +void init_lights_and_color(); +void display(); +void cube_position_and_rotation(); +void draw_rgb_cube(); +static void colored_vertex(float r, float g, float b, float x, float y, float z); +void reshape(int width, int height); +void perspective(float fov, float aspect, float nearClip, float farClip); + +static float cube_spin_angle = 0.0f; +static float cube_z = -6.0f, cube_forward_rotation = -18.0f; + +int main(int argc, char** argv) +{ + glutInit(&argc, argv); + glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); + glutInitWindowSize(640, 448); + glutCreateWindow("RGB Cube"); + init_lights_and_color(); + glutDisplayFunc(display); + glutReshapeFunc(reshape); + glutMainLoop(); + return 0; +} + +void init_lights_and_color() +{ + glEnable(GL_COLOR_MATERIAL); + glColorMaterial(GL_FRONT, GL_DIFFUSE); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); +} + +void display() +{ + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + mDebugPrint("[display() function] Renderer = %s\n", pglGetCurRendererName()); + cube_spin_angle += 0.2f; + draw_rgb_cube(); + glLoadIdentity(); +} + +void draw_rgb_cube() +{ + cube_position_and_rotation(); + //See gmanager.cpp + constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; + glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); + glBegin(GL_TRIANGLES); + { + // +Z (front): A(1,1,1) B(-1,1,1) C(-1,-1,1) D(1,-1,1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, 1, 1); + colored_vertex(0, 1, 0,-1, 1, 1); + colored_vertex(0, 0, 1, -1, -1, 1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, 1, 1); + colored_vertex(0, 0, 1, -1, -1, 1); + colored_vertex(0, 1, 0, 1, -1, 1); + + // -Z (back): A(1,-1,-1) B(-1,-1,-1) C(-1,1,-1) D(1,1,-1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, -1, -1); + colored_vertex(0, 1, 0,-1, -1, -1); + colored_vertex(0, 0, 1,-1, 1, -1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, -1, -1); + colored_vertex(0, 0, 1,-1, 1, -1); + colored_vertex(0, 1, 0, 1, 1, -1); + + // +Y (top): A(1,1,-1) B(-1,1,-1) C(-1,1,1) D(1,1,1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, 1, -1); + colored_vertex(0, 1, 0,-1, 1, -1); + colored_vertex(0, 0, 1,-1, 1, 1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, 1, -1); + colored_vertex(0, 0, 1,-1, 1, 1); + colored_vertex(0, 1, 0, 1, 1, 1); + + // -Y (bottom): A(1,-1,1) B(-1,-1,1) C(-1,-1,-1) D(1,-1,-1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, -1, 1); + colored_vertex(0, 1, 0,-1, -1, 1); + colored_vertex(0, 0, 1,-1, -1, -1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, -1, 1); + colored_vertex(0, 0, 1,-1, -1, -1); + colored_vertex(0, 1, 0, 1, -1, -1); + + // -X (left): A(-1,1,1) B(-1,1,-1) C(-1,-1,-1) D(-1,-1,1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0,-1, 1, 1); + colored_vertex(0, 1, 0,-1, 1, -1); + colored_vertex(0, 0, 1,-1, -1, -1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0,-1, 1, 1); + colored_vertex(0, 0, 1,-1, -1, -1); + colored_vertex(0, 1, 0,-1, -1, 1); + + // +X (right): A(1,1,-1) B(1,1,1) C(1,-1,1) D(1,-1,-1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, 1, -1); + colored_vertex(0, 1, 0, 1, 1, 1); + colored_vertex(0, 0, 1, 1, -1, 1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, 1, -1); + colored_vertex(0, 0, 1, 1, -1, 1); + colored_vertex(0, 1, 0, 1, -1, -1); + } + glEnd(); +} + +void cube_position_and_rotation() +{ + glTranslatef(0.0f, 0.0f, cube_z); + glRotatef(cube_forward_rotation, -1, 0, 0); + glRotatef(cube_spin_angle, 0.0f, 1.0f, 0.0f); +} + +static void colored_vertex(const float r, const float g, const float b, const float x, const float y, const float z) +{ + glColor3f(r, g, b); + glVertex3f(x, y, z); +} + +void reshape(const int width, int height) +{ + if (height == 0) + height = 1; + glViewport(0, 0, width, height); + glMatrixMode(GL_PROJECTION); + perspective(40.0f, (float)width / (float)height, 0.1f, 4000.0f); + glMatrixMode(GL_MODELVIEW); +} + +void perspective(float fov, const float aspect, const float nearClip, const float farClip) +{ + fov *= 3.141592654f / 180.0f; + const float height = 2.0f * nearClip * tanf(fov / 2.0f); + const float width = height * aspect; + glFrustum(-width / 2.0f, width / 2.0f, -height / 2.0f, height / 2.0f, nearClip, farClip); +} \ No newline at end of file diff --git a/examples/pvc_box/unlit_pvc_box.cpp b/examples/pvc_box/unlit_pvc_box.cpp new file mode 100644 index 0000000..6c6a121 --- /dev/null +++ b/examples/pvc_box/unlit_pvc_box.cpp @@ -0,0 +1,137 @@ +#include +#include +#include +#include +#include +#include +#include "ps2gl/renderermanager.h" +#include + +void init_lights_and_color(); +void display(); +void cube_position_and_rotation(); +void draw_rgb_cube(); +static void colored_vertex(float r, float g, float b, float nx, float ny, float nz, float x, float y, float z); +void reshape(int width, int height); +void perspective(float fov, float aspect, float nearClip, float farClip); + +static float cube_spin_angle = 0.0f; +static float cube_z = -6.0f, cube_forward_rotation = -18.0f; + +int main(int argc, char** argv) +{ + glutInit(&argc, argv); + glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);// | GLUT_DEPTH); + glutInitWindowSize(640, 448); + glutCreateWindow("RGB Cube"); + init_lights_and_color(); + mDebugPrint("Renderer = %s\n", pglGetCurRendererName()); + glutDisplayFunc(display); + glutReshapeFunc(reshape); + glutMainLoop(); + return 0; +} + +void init_lights_and_color() +{ + glDisable(GL_LIGHTING); + glDisable(GL_COLOR_MATERIAL); +} + +void display() +{ + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); //REQUIRED + cube_spin_angle += 0.2f; + draw_rgb_cube(); + glLoadIdentity(); + glutSwapBuffers(); +} + +void draw_rgb_cube() +{ + cube_position_and_rotation(); + glBegin(GL_TRIANGLES); + { + // +Z (front): + colored_vertex(1, 0, 0, 0, 0, 1, 1, 1, 1); + colored_vertex(0, 1, 0, 0, 0, 1, -1, 1, 1); + colored_vertex(0, 0, 1, 0, 0, 1, -1, -1, 1); + colored_vertex(1, 0, 0, 0, 0, 1, 1, 1, 1); + colored_vertex(0, 0, 1, 0, 0, 1, -1, -1, 1); + colored_vertex(0, 1, 0, 0, 0, 1, 1, -1, 1); + + // -Z (back): + colored_vertex(1, 0, 0, 0, 0, -1, 1, -1, -1); + colored_vertex(0, 1, 0, 0, 0, -1, -1, -1, -1); + colored_vertex(0, 0, 1, 0, 0, -1, -1, 1, -1); + colored_vertex(1, 0, 0, 0, 0, -1, 1, -1, -1); + colored_vertex(0, 0, 1, 0, 0, -1, -1, 1, -1); + colored_vertex(0, 1, 0, 0, 0, -1, 1, 1, -1); + + // +Y (top): + colored_vertex(1, 0, 0, 0, 1, 0, 1, 1, -1); + colored_vertex(0, 1, 0, 0, 1, 0, -1, 1, -1); + colored_vertex(0, 0, 1, 0, 1, 0, -1, 1, 1); + colored_vertex(1, 0, 0, 0, 1, 0, 1, 1, -1); + colored_vertex(0, 0, 1, 0, 1, 0, -1, 1, 1); + colored_vertex(0, 1, 0, 0, 1, 0, 1, 1, 1); + + // -Y (bottom) + colored_vertex(1, 0, 0, 0, -1, 0, 1, -1, 1); + colored_vertex(0, 1, 0, 0, -1, 0, -1, -1, 1); + colored_vertex(0, 0, 1, 0, -1, 0, -1, -1, -1); + colored_vertex(1, 0, 0, 0, -1, 0, 1, -1, 1); + colored_vertex(0, 0, 1, 0, -1, 0, -1, -1, -1); + colored_vertex(0, 1, 0, 0, -1, 0, 1, -1, -1); + + // -X (left) + colored_vertex(1, 0, 0, -1, 0, 0, -1, 1, 1); + colored_vertex(0, 1, 0, -1, 0, 0, -1, 1, -1); + colored_vertex(0, 0, 1, -1, 0, 0, -1, -1, -1); + colored_vertex(1, 0, 0, -1, 0, 0, -1, 1, 1); + colored_vertex(0, 0, 1, -1, 0, 0, -1, -1, -1); + colored_vertex(0, 1, 0, -1, 0, 0, -1, -1, 1); + + // +X (right) + colored_vertex(1, 0, 0, 1, 0, 0, 1, 1, -1); + colored_vertex(0, 1, 0, 1, 0, 0, 1, 1, 1); + colored_vertex(0, 0, 1, 1, 0, 0, 1, -1, 1); + colored_vertex(1, 0, 0, 1, 0, 0, 1, 1, -1); + colored_vertex(0, 0, 1, 1, 0, 0, 1, -1, 1); + colored_vertex(0, 1, 0, 1, 0, 0, 1, -1, -1); + } + glEnd(); +} + +void cube_position_and_rotation() +{ + glTranslatef(0.0f, 0.0f, cube_z); + glRotatef(cube_forward_rotation, -1, 0, 0); + glRotatef(cube_spin_angle, 0.0f, 1.0f, 0.0f); +} + +static void colored_vertex(const float r, const float g, const float b, const float nx, const float ny, const float nz, + const float x, const float y, const float z) +{ + glColor3f(r, g, b); + // glNormal3f(nx, ny, nz); //for sending in ignored normals when we want to + glVertex3f(x, y, z); +} + +void reshape(const int width, int height) +{ + if (height == 0) + height = 1; + glViewport(0, 0, width, height); + glMatrixMode(GL_PROJECTION); //REQUIRED (or else black screen) + perspective(40.0f, (float)width / (float)height, 0.1f, 4000.0f); + glMatrixMode(GL_MODELVIEW); //REQUIRED (or else black screen) +} + +void perspective(float fov, const float aspect, const float nearClip, const float farClip) +{ + fov *= 3.141592654f / 180.0f; + const float height = 2.0f * nearClip * tanf(fov / 2.0f); + const float width = height * aspect; + glFrustum(-width / 2.0f, width / 2.0f, -height / 2.0f, height / 2.0f, nearClip, farClip); +} \ No newline at end of file diff --git a/examples/tricked_out/billboard_renderer.cpp b/examples/tricked_out/billboard_renderer.cpp index 1cab68f..e439547 100644 --- a/examples/tricked_out/billboard_renderer.cpp +++ b/examples/tricked_out/billboard_renderer.cpp @@ -143,7 +143,7 @@ void CBillboardRenderer::InitContext(GLenum primType, uint32_t rcChanges, bool u packet += cpu_vec_4(0, 0, 1, 0); // set the color, max is 128 because this is unity when texture mapping is enabled - cpu_vec_4 color = glContext.GetMaterialManager().GetCurColor() * 128.0f; + cpu_vec_4 color = glContext.GetMaterialManager().GetCurMatColor() * 128.0f; packet += (unsigned int)color[0]; packet += (unsigned int)color[1]; packet += (unsigned int)color[2]; diff --git a/include/ps2gl/base_renderer.h b/include/ps2gl/base_renderer.h index a6e4859..3abc52f 100644 --- a/include/ps2gl/base_renderer.h +++ b/include/ps2gl/base_renderer.h @@ -26,7 +26,8 @@ class CBaseRenderer : public CRenderer { // cached in DrawArrays from geometry manager for XferBlock float CurTexCoord[2]; cpu_vec_xyz CurNormal; - CDmaPacket *TexCoordBuf, *NormalBuf; + cpu_vec_xyzw CurGeomColor; + CDmaPacket *TexCoordBuf, *NormalBuf, *ColorBuf; int WordsPerVertex, WordsPerNormal, WordsPerTexCoord, WordsPerColor; unsigned int VertexUnpackMode, NormalUnpackMode; diff --git a/include/ps2gl/dlgmanager.h b/include/ps2gl/dlgmanager.h index 4136c77..466878f 100644 --- a/include/ps2gl/dlgmanager.h +++ b/include/ps2gl/dlgmanager.h @@ -54,8 +54,8 @@ class CDListGeomManager : public CGeomManager { void TexCoord(float u, float v); void Color(cpu_vec_xyzw color); void EndGeom(); - void DrawArrays(GLenum mode, int first, int count); - void DrawIndexedArrays(GLenum primType, + void LinearArraysGeomStage(GLenum mode, int first, int count); + void IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices); void Flush(); diff --git a/include/ps2gl/fixed_function.h b/include/ps2gl/fixed_function.h new file mode 100644 index 0000000..f046995 --- /dev/null +++ b/include/ps2gl/fixed_function.h @@ -0,0 +1,224 @@ +#ifndef ps2gl_fixed_function_h +#define ps2gl_fixed_function_h + +//TODO: this is half composed, i need to really justify the existinance of this if going this course... +// it may purely be something that would "help" me, and thus i am uncertain if its truly justified to rewrite all the logic. +// This will be the biggest merge conflict once i perhaps integrate it. +#pragma once +#include +#include +#ifndef GL_DIFFUSE + #define GL_DIFFUSE 0x1201 +#endif +// typedef unsigned int GLenum; //TODO??? +#include +#include "ps2gl/gmanager.h" +#include "ps2gl/gblock.h" +#include "ps2gl/renderermanager.h" +#include "ps2gl/glcontext.h" + +typedef enum { + FIXED_FUNCTION_ATTR_NONE = 0, + FIXED_FUNCTION_ATTR_CONSTANT, + FIXED_FUNCTION_ATTR_ARRAY +} FixedFunctionDataSrc; + +typedef enum { + FIXED_FUNCTION_COLOR_CONSTANT = 0, + FIXED_FUNCTION_COLOR_ARRAY, + FIXED_FUNCTION_COLOR_LIT +} FixedFunctionColor; //TODO I get that this is for emphasis but it seems ugly idk + +typedef struct { + bool texture2dEnabled; + bool lightingEnabled; + bool colorMaterialEnabled; + GLenum colorMaterialMode; + + bool vertexArrayEnabled; + bool normalArrayEnabled; + bool texcoordArrayEnabled; + bool colorArrayEnabled; + + bool diffuseTextureBound; + + float currentColor[4]; + float currentNormal[3]; + float currentTexCoord[2]; + + float tintRgba[4]; + + bool immediateColorVariesInPrimitive; +} FixedFunctionConditions; //TODO: should we merge this with state somehow? + +typedef struct { + FixedFunctionDataSrc vertexSrc; + FixedFunctionDataSrc normalSrc; + FixedFunctionDataSrc texcoordSrc; + FixedFunctionDataSrc colorSrc; + + FixedFunctionColor ffColor; + + bool V, N, T, C; + + bool textureFlag; + bool lightingFlag; + bool colorMaterialAffectsDiffuse; +} FixedFunctionState; + +typedef enum { + QW_NONE = 0x0, // ---- + QW_X = 0x1, // X--- + QW_XY = 0x3, // XY-- + QW_XYZ = 0x7, // XYZ- + QW_XYZW = 0xF // XYZW +} QuadWords; + +typedef struct { + QuadWords vertices; // legal: QW_XYZ or QW_XYZW // TODO: remove (qw == QW_XYZ) I THINK!! + QuadWords normals; // legal: QW_NONE or QW_XYZ + QuadWords texcoords; // legal: QW_NONE or QW_XY + QuadWords colors; // legal: QW_NONE or QW_XYZW +} LaneConfig; + +static inline int verticesOk(QuadWords qw) { return (qw == QW_XYZ) || (qw == QW_XYZW); } // TODO: remove (qw == QW_XYZ) +static inline int normalsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XYZ); } +static inline int texcoordsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XY); } +static inline int colorsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XYZW); } + +static inline int ValidateLaneConfig(const LaneConfig* lanes, const char* where) { + if (!verticesOk(lanes->vertices) || !normalsOk(lanes->normals) || !texcoordsOk(lanes->texcoords) || !colorsOk(lanes->colors)) { + mError("%s: illegal lane masks (V=%x N=%x T=%x C=%x)", where, lanes->vertices, lanes->normals, lanes->texcoords, lanes->colors); + return 0; + } + return 1; +} + +static inline int QWToWords(QuadWords qw) { + switch (qw) { + case QW_NONE: return 0; + case QW_X: return 1; //TODO: just for brevity + case QW_XY: return 2; + case QW_XYZ: return 3; + case QW_XYZW: return 4; + default: return 0; + } +} + +static inline int LanePresent(QuadWords qw) { return (qw != QW_NONE); } + +static inline FixedFunctionState evaluate(const FixedFunctionConditions* conditions) +{ + FixedFunctionState state; + memset(&state, 0, sizeof(state)); + + state.vertexSrc = FIXED_FUNCTION_ATTR_ARRAY; + state.V = true; + + state.textureFlag = (conditions->texture2dEnabled && conditions->diffuseTextureBound && conditions->texcoordArrayEnabled); + state.texcoordSrc = state.textureFlag ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_NONE; + state.T = (state.texcoordSrc == FIXED_FUNCTION_ATTR_ARRAY); + + const bool lightingFeasible = (conditions->lightingEnabled && conditions->normalArrayEnabled); + state.lightingFlag = lightingFeasible; + state.normalSrc = state.lightingFlag ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_NONE; + state.N = (state.normalSrc == FIXED_FUNCTION_ATTR_ARRAY); + + const bool perVertexColorSupplyPresent = conditions->colorArrayEnabled || conditions->immediateColorVariesInPrimitive; + + if (!state.lightingFlag) { + if (perVertexColorSupplyPresent) { + state.colorSrc = FIXED_FUNCTION_ATTR_ARRAY; + state.ffColor = FIXED_FUNCTION_COLOR_ARRAY; + state.C = true; + } else { + state.colorSrc = FIXED_FUNCTION_ATTR_CONSTANT; + state.ffColor = FIXED_FUNCTION_COLOR_CONSTANT; + state.C = false; + } + } else { + state.colorSrc = perVertexColorSupplyPresent ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_CONSTANT; + state.ffColor = FIXED_FUNCTION_COLOR_LIT; + state.C = false; + } + + state.colorMaterialAffectsDiffuse = + (state.lightingFlag && + conditions->colorMaterialEnabled && + conditions->colorMaterialMode == GL_DIFFUSE && + perVertexColorSupplyPresent); + + return state; +} + +static inline void capture( + FixedFunctionConditions* conditions, + CGLContext& glContext, + const CVertArray& vertArray, + bool diffuseTextureIsBound, + const float tintRgba[4], + bool immediateColorVariesInPrimitive) +{ + memset(conditions, 0, sizeof(*conditions)); + + conditions->texture2dEnabled = glContext.IsTextureEnabled(); + conditions->lightingEnabled = glContext.IsLightingEnabled(); + conditions->colorMaterialEnabled = glContext.IsColorMaterialEnabled(); + conditions->colorMaterialMode = glContext.GetColorMaterialMode(); + + conditions->vertexArrayEnabled = vertArray.GetVerticesAreValid(); + conditions->normalArrayEnabled = vertArray.GetNormalsAreValid(); + conditions->texcoordArrayEnabled = vertArray.GetTexCoordsAreValid(); + conditions->colorArrayEnabled = vertArray.GetColorsAreValid() && vertArray.GetWordsPerColor() == 4; + + conditions->diffuseTextureBound = diffuseTextureIsBound; + + //cpu_vec_xyzw current = glContext.GetGeomManager().GetCurGeomColor(); + cpu_vec_xyzw currentColor = glContext.GetCurrentGeomColor(); + conditions->currentColor[0] = currentColor[0]; + conditions->currentColor[1] = currentColor[1]; + conditions->currentColor[2] = currentColor[2]; + conditions->currentColor[3] = currentColor[3]; + + //TODO WHAT? WHY WOULDNT WE JUST GET FROM THE actual defaults? (I ADDED BELOW AND FOR TEXCOORd/... + // conditions->currentNormal[0] = 0.0f; + // conditions->currentNormal[1] = 0.0f; + // conditions->currentNormal[2] = 1.0f; + cpu_vec_xyz currentNormal = glContext.GetCurrentNormal(); + conditions->currentNormal[0] = currentNormal[0]; + conditions->currentNormal[1] = currentNormal[1]; + conditions->currentNormal[2] = currentNormal[2]; + + const float* currentTexCoord = glContext.GetCurrentTexCoord(); + conditions->currentTexCoord[0] = currentTexCoord[0]; + conditions->currentTexCoord[1] = currentTexCoord[1]; + + memcpy(conditions->tintRgba, tintRgba, sizeof(float)*4); + conditions->immediateColorVariesInPrimitive = immediateColorVariesInPrimitive; +} + +static inline void apply( + const FixedFunctionState& state, + CGeometryBlock& geometry, + CRendererManager& rendererManager, + CGLContext& glContext) //TODO what would we use glContext here for again??? +{ + //TODO: can we integrate the QuadWord stuff and validation here? would it be helpful? i would like to have at least those enums rather than the shitty ints here that are ambigious... + geometry.SetWordsPerVertex(4); + geometry.SetWordsPerNormal(state.N ? 3 : 0); + geometry.SetWordsPerTexCoord(state.T ? 2 : 0); + geometry.SetWordsPerColor(state.C ? 4 : 0); + + geometry.SetVerticesAreValid(true); + geometry.SetNormalsAreValid(state.N); + geometry.SetTexCoordsAreValid(state.T); + geometry.SetColorsAreValid(state.C); + if (state.colorMaterialAffectsDiffuse) { + rendererManager.PerVtxMaterialChanged(RendererProps::kDiffuse); + } else { + rendererManager.PerVtxMaterialChanged(RendererProps::kNoMaterial); + } + +} + +#endif // ps2gl_fixed_function_h diff --git a/include/ps2gl/gblock.h b/include/ps2gl/gblock.h index b121bfe..2567b36 100644 --- a/include/ps2gl/gblock.h +++ b/include/ps2gl/gblock.h @@ -72,12 +72,10 @@ class CGeometryBlock { CGeometryBlock() { Reset(); } // get/set info about geometry - inline void SetVerticesAreValid(bool valid) { AreNewVerticesValid = valid; } inline void SetNormalsAreValid(bool valid) { AreNewNormalsValid = valid; } inline void SetTexCoordsAreValid(bool valid) { AreNewTexCoordsValid = valid; } inline void SetColorsAreValid(bool valid) { AreNewColorsValid = valid; } - inline bool GetVerticesAreValid() const { return AreVerticesValid; } inline bool GetNormalsAreValid() const { return AreNormalsValid; } inline bool GetTexCoordsAreValid() const { return AreTexCoordsValid; } diff --git a/include/ps2gl/glcontext.h b/include/ps2gl/glcontext.h index d9ed0f6..dae779e 100644 --- a/include/ps2gl/glcontext.h +++ b/include/ps2gl/glcontext.h @@ -13,6 +13,7 @@ #include "ps2s/gsmem.h" #include "ps2s/packet.h" +#include "ps2s/cpu_vector.h" #include "GL/gl.h" @@ -408,6 +409,14 @@ class CGLContext { void WaitForVSync(); void SwapBuffers(); + + bool IsTextureEnabled(); + bool IsLightingEnabled(); + bool IsColorMaterialEnabled(); + GLenum GetColorMaterialMode(); + cpu_vec_xyzw GetCurrentGeomColor(); + cpu_vec_xyz GetCurrentNormal(); + const float* GetCurrentTexCoord(); }; // global pointer to the GLContext diff --git a/include/ps2gl/gmanager.h b/include/ps2gl/gmanager.h index bc110fb..92ec036 100644 --- a/include/ps2gl/gmanager.h +++ b/include/ps2gl/gmanager.h @@ -19,7 +19,7 @@ /******************************************** * constants */ - +enum ColorSrc : uint8_t { kColor_Float = 0, kColor_UByte = 1 }; /******************************************** * CVertArray */ @@ -28,6 +28,7 @@ class CVertArray { void *Vertices, *Normals, *TexCoords, *Colors; bool VerticesAreValid, NormalsAreValid, TexCoordsAreValid, ColorsAreValid; char WordsPerVertex, WordsPerNormal, WordsPerTexCoord, WordsPerColor; + ColorSrc ColorSrcType; public: CVertArray(); @@ -51,6 +52,8 @@ class CVertArray { inline void SetNormals(void* newPtr) { Normals = newPtr; } inline void SetTexCoords(void* newPtr) { TexCoords = newPtr; } inline void SetColors(void* newPtr) { Colors = newPtr; } + inline ColorSrc GetColorSrcType() const { return ColorSrcType; } + inline void SetColorSrc(ColorSrc src) { ColorSrcType = src; } inline int GetWordsPerVertex() const { return WordsPerVertex; } inline int GetWordsPerNormal() const { return WordsPerNormal; } @@ -104,6 +107,7 @@ class CGeomManager { static tUserPrimEntry UserPrimTypes[kMaxUserPrimTypes]; // GL state + cpu_vec_xyzw CurGeomColor; cpu_vec_xyz CurNormal; float CurTexCoord[2]; static bool DoNormalize; @@ -167,8 +171,9 @@ class CGeomManager { void SetUserRenderContextChanged() { UserRenderContextChanged = true; } // GL state - + inline cpu_vec_xyzw GetCurGeomColor() const { return CurGeomColor; } inline cpu_vec_xyz GetCurNormal() const { return CurNormal; } + inline void SetCurGeomColor(cpu_vec_xyzw color) { CurGeomColor = color; } inline void SetCurNormal(cpu_vec_xyz normal) { CurNormal = normal; } inline const float* GetCurTexCoord() const { return CurTexCoord; } @@ -196,8 +201,8 @@ class CGeomManager { virtual void TexCoord(float u, float v) = 0; virtual void Color(cpu_vec_xyzw color) = 0; virtual void EndGeom() = 0; - virtual void DrawArrays(GLenum mode, int first, int count) = 0; - virtual void DrawIndexedArrays(GLenum primType, + virtual void LinearArraysGeomStage(GLenum mode, int first, int count) = 0; + virtual void IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) = 0; @@ -205,3 +210,5 @@ class CGeomManager { }; #endif // ps2gl_gmanager_h + +// #include "ps2gl/fixed_function.h" \ No newline at end of file diff --git a/include/ps2gl/immgmanager.h b/include/ps2gl/immgmanager.h index cb37e4c..9b61de7 100644 --- a/include/ps2gl/immgmanager.h +++ b/include/ps2gl/immgmanager.h @@ -8,6 +8,7 @@ #define ps2gl_immgmanager_h #include "ps2gl/gmanager.h" +#include "ps2gl/fixed_function.h" /******************************************** * CImmGeomManager - the immediate renderer @@ -27,6 +28,7 @@ class CImmGeomManager : public CGeomManager { CGeometryBlock Geometry; void CommitNewGeom(); + bool ColorVariesInPrim = false; public: CImmGeomManager(CGLContext& context, int immBufferQwordSize); @@ -61,7 +63,7 @@ class CImmGeomManager : public CGeomManager { // normal, tex coord or vertex color is supplied for each vertex inline CDmaPacket& GetNormalBuf() { return *CurNormalBuf; } inline CDmaPacket& GetTexCoordBuf() { return *CurTexCoordBuf; } - + inline CDmaPacket& GetColorBuf() { return *CurColorBuf; } // user state void EnableCustom(uint64_t flag) { RendererManager.EnableCustom(flag); } @@ -75,8 +77,8 @@ class CImmGeomManager : public CGeomManager { void TexCoord(float u, float v); void Color(cpu_vec_xyzw color); void EndGeom(); - void DrawArrays(GLenum mode, int first, int count); - void DrawIndexedArrays(GLenum primType, + void LinearArraysGeomStage(GLenum mode, int first, int count); + void IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices); void Flush(); diff --git a/include/ps2gl/material.h b/include/ps2gl/material.h index efefa76..2001267 100644 --- a/include/ps2gl/material.h +++ b/include/ps2gl/material.h @@ -104,7 +104,7 @@ class CMaterialManager { CDListMaterial DListMaterial; CMaterial* CurMaterial; - cpu_vec_xyzw CurColor; + cpu_vec_xyzw CurMatColor; GLenum ColorMaterialMode; bool UseColorMaterial; bool InDListDef; @@ -115,7 +115,7 @@ class CMaterialManager { , ImmMaterial(context) , DListMaterial(context) , CurMaterial(&ImmMaterial) - , CurColor(1, 1, 1, 1) + , CurMatColor(1, 1, 1, 1) , ColorMaterialMode(GL_AMBIENT_AND_DIFFUSE) , UseColorMaterial(false) , InDListDef(false) @@ -126,7 +126,7 @@ class CMaterialManager { CMaterial& GetCurMaterial() { return *CurMaterial; } CImmMaterial& GetImmMaterial() { return ImmMaterial; } CDListMaterial& GetDListMaterial() { return DListMaterial; } - cpu_vec_xyzw GetCurColor() const { return CurColor; } + cpu_vec_xyzw GetCurMatColor() const { return CurMatColor; } GLenum GetColorMaterialMode() const { return ColorMaterialMode; } bool GetColorMaterialEnabled() const { return UseColorMaterial; } diff --git a/include/ps2gl/renderer.h b/include/ps2gl/renderer.h index 37557b6..f4970c4 100644 --- a/include/ps2gl/renderer.h +++ b/include/ps2gl/renderer.h @@ -68,7 +68,7 @@ typedef enum { k1PtLight = 1 << 0, k8PtLights = 1 << 2 } tNumPtLights; typedef enum { kNoMaterial = 1 << 0, - kDiffuse = 1 << 1, + kDiffuse = 1 << 1, //TODO: it seems important to consolidate where "colors" and "light" differ still kSpecular = 1 << 2 } tPerVtxMaterial; typedef enum { kLinear = 1 << 0, diff --git a/src/base_renderer.cpp b/src/base_renderer.cpp index 8288729..a809bd4 100644 --- a/src/base_renderer.cpp +++ b/src/base_renderer.cpp @@ -9,6 +9,7 @@ #include "ps2s/packet.h" #include +#include #include "ps2gl/base_renderer.h" #include "ps2gl/drawcontext.h" @@ -65,25 +66,45 @@ void CBaseRenderer::InitXferBlock(CVifSCDmaPacket& packet, NormalBuf = &gmanager.GetNormalBuf(); TexCoordBuf = &gmanager.GetTexCoordBuf(); + ColorBuf = &gmanager.GetColorBuf(); CurNormal = gmanager.GetCurNormal(); const float* texCoord = gmanager.GetCurTexCoord(); CurTexCoord[0] = texCoord[0]; CurTexCoord[1] = texCoord[1]; + CurGeomColor = gmanager.GetCurGeomColor(); // get unpack modes/masks - WordsPerVertex = wordsPerVertex; + LaneConfig lanes; + lanes.vertices = (wordsPerVertex == 4) ? QW_XYZW : + (wordsPerVertex == 3) ? QW_XYZ : QW_NONE; + lanes.normals = (wordsPerNormal == 0) ? QW_NONE : + (wordsPerNormal == 3) ? QW_XYZ : QW_NONE; + lanes.texcoords = (wordsPerTex == 0) ? QW_NONE : + (wordsPerTex == 2) ? QW_XY : QW_NONE; + lanes.colors = (wordsPerColor == 0) ? QW_NONE : + (wordsPerColor == 4) ? QW_XYZW : QW_NONE; + + ValidateLaneConfig(&lanes, "InitXferBlock"); + WordsPerVertex = QWToWords(lanes.vertices); GetUnpackAttribs(WordsPerVertex, VertexUnpackMode, VertexUnpackMask); - WordsPerNormal = (wordsPerNormal > 0) ? wordsPerNormal : 3; - GetUnpackAttribs(WordsPerNormal, NormalUnpackMode, NormalUnpackMask); + // WordsPerNormal = (wordsPerNormal > 0) ? wordsPerNormal : 3; + WordsPerNormal = QWToWords(lanes.normals); + if (WordsPerNormal > 0) GetUnpackAttribs(WordsPerNormal, NormalUnpackMode, NormalUnpackMask); - WordsPerTexCoord = (wordsPerTex > 0) ? wordsPerTex : 2; - GetUnpackAttribs(WordsPerTexCoord, TexCoordUnpackMode, TexCoordUnpackMask); + // WordsPerTexCoord = (wordsPerTex > 0) ? wordsPerTex : 2; + WordsPerTexCoord = QWToWords(lanes.texcoords); + if (WordsPerTexCoord > 0) GetUnpackAttribs(WordsPerTexCoord, TexCoordUnpackMode, TexCoordUnpackMask); - WordsPerColor = (wordsPerColor > 0) ? wordsPerColor : 3; - GetUnpackAttribs(WordsPerColor, ColorUnpackMode, ColorUnpackMask); + WordsPerColor = QWToWords(lanes.colors); + if (WordsPerColor > 0) GetUnpackAttribs(WordsPerColor, ColorUnpackMode, ColorUnpackMask); + + XferVertices = LanePresent(lanes.vertices); + XferNormals = LanePresent(lanes.normals); + XferTexCoords = LanePresent(lanes.texcoords); + XferColors = LanePresent(lanes.colors); // set up the row register to expand vectors with fewer than 4 elements @@ -116,6 +137,12 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, const void* texCoords, const void* colors, int vu1Offset, int firstElement, int numToAdd) { + //TODO: lane mapping V|T|C, V|C|T, V|N|T|C is difficult to figure out with the vu code sometimes + // should be super super clear somewhere probably better than here + const int laneV = 0; + const int laneN = 1; + const int laneT = 2; + const int laneC = 3; // // vertices // @@ -125,7 +152,7 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, XferVectors(packet, (unsigned int*)vertices, firstElement, numToAdd, WordsPerVertex, VertexUnpackMask, VertexUnpackMode, - vu1Offset); + vu1Offset + laneV); } // @@ -146,11 +173,12 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, normalBuf += CurNormal; } - if (XferNormals) + if (XferNormals) { XferVectors(packet, (unsigned int*)normals, firstNormal, numToAdd, WordsPerNormal, NormalUnpackMask, NormalUnpackMode, - vu1Offset + 1); + vu1Offset + laneN); + } // // tex coords @@ -169,11 +197,12 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, texCoordBuf += CurTexCoord[1]; } } - if (XferTexCoords) + if (XferTexCoords) { XferVectors(packet, (unsigned int*)texCoords, firstTexCoord, numToAdd, WordsPerTexCoord, TexCoordUnpackMask, TexCoordUnpackMode, - vu1Offset + 2); + vu1Offset + laneT); + } // // colors @@ -184,7 +213,7 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, XferVectors(packet, (unsigned int*)colors, firstColor, numToAdd, WordsPerColor, ColorUnpackMask, ColorUnpackMode, - vu1Offset + 3); + vu1Offset + laneC); } } @@ -252,7 +281,11 @@ void CBaseRenderer::AddVu1RendererContext(CVifSCDmaPacket& packet, GLenum primTy packet += numPts; packet += numSpots; } else { + /* packet += (uint64_t)0; + */ + packet += 0; + packet += 0; packet += 0; } @@ -314,10 +347,18 @@ void CBaseRenderer::AddVu1RendererContext(CVifSCDmaPacket& packet, GLenum primTy // add emissive component cpu_vec_4 emission; + if (doLighting) { + emission = material.GetEmission() * maxColorValue; + } else { + emission = glContext.GetGeomManager().GetCurGeomColor() * maxColorValue; + // emission = glContext.GetMaterialManager().GetCurMatColor() * maxColorValue; + } + /* if (doLighting) emission = material.GetEmission() * maxColorValue; else emission = glContext.GetMaterialManager().GetCurColor() * maxColorValue; + */ packet += emission; // ambient @@ -328,7 +369,8 @@ void CBaseRenderer::AddVu1RendererContext(CVifSCDmaPacket& packet, GLenum primTy // the alpha value is set to the alpha of the diffuse in the renderers; // this should be the current color alpha if lighting is disabled if (!doLighting) - matDiffuse[3] = glContext.GetMaterialManager().GetCurColor()[3]; + matDiffuse[3] = glContext.GetGeomManager().GetCurGeomColor()[3]; + // matDiffuse[3] = glContext.GetMaterialManager().GetCurMatColor()[3]; packet += matDiffuse; // specular @@ -354,6 +396,8 @@ void CBaseRenderer::AddVu1RendererContext(CVifSCDmaPacket& packet, GLenum primTy GLenum newPrimType = drawContext.GetPolygonMode(); if (newPrimType == GL_FILL) newPrimType = primType; + if (newPrimType == GL_LINE) + newPrimType = GL_LINES; newPrimType &= 0xff; tGifTag giftag = BuildGiftag(newPrimType); packet += giftag; @@ -378,24 +422,32 @@ tGifTag CBaseRenderer::BuildGiftag(GLenum primType) { CGLContext& glContext = *pGLContext; - - primType &= 0x7; // convert from GL #define to gs prim number + //TODO: JESUS CHRIST + if (primType == GL_LINES) { + primType = 1; + } else { + primType &= 0x7; // convert from GL #define to gs prim number + } CImmDrawContext& drawContext = glContext.GetImmDrawContext(); bool smoothShading = drawContext.GetDoSmoothShading(); bool useTexture = glContext.GetTexManager().GetTexEnabled(); bool alpha = drawContext.GetBlendEnabled(); unsigned int nreg = OutputQuadsPerVert; + // bool flip = drawContext.CurFrameMem != drawContext.Frame0Mem; + // GS::tPrim prim = { .prim_type = primType, .iip = smoothShading, .tme = useTexture, .fge = 0, .abe = alpha, .aa1 = 0, .fst = 0, .ctxt = flip, .fix = 0 }; + mDebugPrint("primType =%d)\n", (int)primType); - GS::tPrim prim = { prim_type : primType, iip : smoothShading, tme : useTexture, fge : 0, abe : alpha, aa1 : 0, fst : 0, ctxt : 0, fix : 0 }; - tGifTag giftag = { NLOOP : 0, EOP : 1, pad0 : 0, id : 0, PRE : 1, PRIM : *(uint64_t*)&prim, FLG : 0, NREG : nreg, REGS0 : 2, REGS1 : 1, REGS2 : 4 }; + GS::tPrim prim = { .prim_type = primType, .iip = smoothShading, .tme = useTexture, .fge = 0, .abe = alpha, .aa1 = 0, .fst = 0, .ctxt = 0, .fix = 0 }; + tGifTag giftag = { .NLOOP = 0, .EOP = 1, .pad0 = 0, .id = 0, .PRE = 1, .PRIM = *(uint64_t*)&prim, .FLG = 0, .NREG = nreg, .REGS0 = 2, .REGS1 = 1, .REGS2 = 4 }; return giftag; } void CBaseRenderer::CacheRendererState() { - XferNormals = pGLContext->GetImmLighting().GetLightingEnabled(); - XferTexCoords = pGLContext->GetTexManager().GetTexEnabled(); - XferColors = pGLContext->GetMaterialManager().GetColorMaterialEnabled(); + //TODO: these are too confusing??? look at CommitNewGeom, and SyncRenderer and all that stuff + //XferNormals = pGLContext->GetImmLighting().GetLightingEnabled(); + //XferTexCoords = pGLContext->GetTexManager().GetTexEnabled(); + //TODO: cannot decide Xfercolor state yet because we dont know if its per vertex or constant yet...???? } void CBaseRenderer::Load() diff --git a/src/dlgmanager.cpp b/src/dlgmanager.cpp index 1b5c3d2..6fa96d5 100644 --- a/src/dlgmanager.cpp +++ b/src/dlgmanager.cpp @@ -247,7 +247,7 @@ void CDListGeomManager::EndGeom() * DrawArrays */ -void CDListGeomManager::DrawArrays(GLenum mode, int first, int count) +void CDListGeomManager::LinearArraysGeomStage(GLenum mode, int first, int count) { if (Prim != mode) PrimChanged(mode); @@ -280,7 +280,7 @@ void CDListGeomManager::DrawArrays(GLenum mode, int first, int count) CommitNewGeom(); } -void CDListGeomManager::DrawIndexedArrays(GLenum primType, +void CDListGeomManager::IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) { diff --git a/src/glcontext.cpp b/src/glcontext.cpp index b513dd0..43fd1de 100644 --- a/src/glcontext.cpp +++ b/src/glcontext.cpp @@ -789,3 +789,33 @@ const GLubyte* glGetString(GLenum name) mNotImplemented(); return (GLubyte*)"not implemented"; } + +bool CGLContext::IsTextureEnabled() { + return GetTexManager().GetTexEnabled(); +} + +bool CGLContext::IsLightingEnabled() { + return GetImmLighting().GetLightingEnabled(); +} + +bool CGLContext::IsColorMaterialEnabled() { + return GetMaterialManager().GetColorMaterialEnabled(); +} + +GLenum CGLContext::GetColorMaterialMode() { + return GetMaterialManager().GetColorMaterialMode(); +} + +cpu_vec_xyzw CGLContext::GetCurrentGeomColor() { + return GetGeomManager().GetCurGeomColor(); +} + +cpu_vec_xyz CGLContext::GetCurrentNormal() { + return GetGeomManager().GetCurNormal(); +} + +const float* CGLContext::GetCurrentTexCoord() { + return GetGeomManager().GetCurTexCoord(); +} + + diff --git a/src/gmanager.cpp b/src/gmanager.cpp index 2636b1c..86e288f 100644 --- a/src/gmanager.cpp +++ b/src/gmanager.cpp @@ -30,6 +30,7 @@ CVertArray::CVertArray() VerticesAreValid = NormalsAreValid = TexCoordsAreValid = ColorsAreValid = false; WordsPerVertex = WordsPerTexCoord = WordsPerColor = 0; WordsPerNormal = 3; // not set by NormalPointer + ColorSrcType = kColor_Float; } /******************************************** @@ -46,6 +47,21 @@ bool CGeomManager::DoNormalize = false; CGeomManager::CGeomManager(CGLContext& context) : GLContext(context) + , CurGeomColor(1.0f, 1.0f, 1.0f, 1.0f) //TODO: I think this is the GL color behavior? for missing colors case... + //NOTE: this default CurNormal allows for "appearing" unlit PVC effect when: + // - ColorMaterial Enabled + // - Lighting Enabled + // - Light0 Enabled + // - Normals are never set during the glBegin/glEnd -> Default CurNormal of {0.0, 0.0, 1.0} (set in this constructor) is set + // VU Renderer: "linear, pvc, tris" is then targetted + // IMPORTANT: EVERY FRAME Light0's direction is set via + // constexpr float direction_towards_per_vertex_normal[4] = {0.0, 0.0, 1.0, 0.0}; + // glLightfv(GL_LIGHT0, GL_POSITION, direction_towards_per_vertex_normal); + // Why the dot product in the VU1 renderer GeneralPVDiff cancels out the Diffuse lighting effect: + // ps2gl converts lights into object/model space for VU1: + // AddVu1RendererContext: lighting calculations are done in object/model space via worldToObjXfrm). + // so LIGHT0's direction {0, 0, 1, 0} remains aligned with every vertices default CurNormal = {0,0,1}. + // Therefore N·L = 1 for the whole object/model , CurNormal(0.0f, 0.0f, 1.0f) , Prim(GL_INVALID_VALUE) , InsideBeginEnd(false) @@ -140,23 +156,39 @@ void glTexCoordPointer(GLint size, GLenum type, * @param stride must be zero. Non-zero strides are unsupported and likely * to remain so. */ -void glColorPointer(GLint size, GLenum type, - GLsizei stride, const GLvoid* ptr) +void glColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid* ptr) { GL_FUNC_DEBUG("%s\n", __FUNCTION__); + mDebugPrint("glColorPointer: size=%d type=0x%X stride=%d ptr=%p\n", (int)size, (unsigned)type, (int)stride, ptr); if (stride != 0) { mNotImplemented("stride must be 0"); return; } if (type != GL_FLOAT) { + if (type == GL_UNSIGNED_BYTE) { + if (ptr) { + const unsigned char* colorSample = (const unsigned char*)ptr; + mDebugPrint("glColorPointer: SAMPLE u8=(%u,%u,%u,%u)\n", colorSample[0], colorSample[1], colorSample[2], colorSample[3]); + } + CVertArray& vertArray = pGLContext->GetGeomManager().GetVertArray(); + vertArray.SetColors((void*)ptr); + vertArray.SetWordsPerColor(4); + vertArray.SetColorSrc(kColor_UByte); + return; + } mNotImplemented("type must be float"); return; } + if (ptr) { + const float* colorSample = (const float*)ptr; + mDebugPrint("glColorPointer: SAMPLE f32=(%.3f,%.3f,%.3f,%.3f)\n", colorSample[0], colorSample[1], colorSample[2], colorSample[3]); + } CVertArray& vertArray = pGLContext->GetGeomManager().GetVertArray(); vertArray.SetColors((void*)ptr); vertArray.SetWordsPerColor(size); + mDebugPrint("glColorPointer: BOUND F32 colors (wpc=%d)\n", (int)size); } /** @@ -174,18 +206,101 @@ void glDrawArrays(GLenum mode, GLint first, GLsizei count) { GL_FUNC_DEBUG("%s\n", __FUNCTION__); + if (pGLContext->GetImmDrawContext().GetPolygonMode() == GL_LINE && mode == GL_TRIANGLES) { + GLushort maxIndex = (GLushort)(first + count - 1); + if (maxIndex <= 255) { + GLsizei triangleCount = count / 3; + GLsizei lineIndexCount = triangleCount * 6; + static uint8_t* indices_u8_scratch = NULL; + static int scratchCapacity = 0; + if (scratchCapacity < lineIndexCount) { + delete[] indices_u8_scratch; + indices_u8_scratch = new uint8_t[lineIndexCount]; + scratchCapacity = (int)lineIndexCount; + } + uint8_t* p = indices_u8_scratch; + for (GLsizei i = 0; i + 2 < count; i += 3) { + uint8_t a = (uint8_t)(first + i + 0); + uint8_t b = (uint8_t)(first + i + 1); + uint8_t c = (uint8_t)(first + i + 2); + *p++ = a; *p++ = b; + *p++ = b; *p++ = c; + *p++ = c; *p++ = a; + } + CGeomManager& gmanager = pGLContext->GetGeomManager(); + gmanager.IndexedArraysGeomStage(GL_TRIANGLES, (int)lineIndexCount, indices_u8_scratch, (int)(maxIndex + 1)); + return; + } + mode = GL_LINES; + } CGeomManager& gmanager = pGLContext->GetGeomManager(); - gmanager.DrawArrays(mode, first, count); + gmanager.LinearArraysGeomStage(mode, first, count); } /** - * This is not implemented yet + * This is now being implemented/experimental */ void glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) { GL_FUNC_DEBUG("%s\n", __FUNCTION__); - mError("glDrawElements is a placeholder ATM and should not be called"); + if (type != GL_UNSIGNED_SHORT) { + mNotImplemented("glDrawElements only supports GL_UNSIGNED_SHORT for now"); + return; + } + + const GLushort* indices_u16 = (const GLushort*)indices; + GLushort max = 0; + + for (GLsizei i = 0; i < count; ++i) { + if (indices_u16[i] > max) max = indices_u16[i]; + } + + const int numVertices = (int)max + 1; + + static GLushort* indices_u16_scratch = NULL; + static int scratch16Capacity = 0; + if (pGLContext->GetImmDrawContext().GetPolygonMode() == GL_LINE && mode == GL_TRIANGLES) + { + int triangleCount = count / 3; + int lineCount = triangleCount * 6; + if (scratch16Capacity < lineCount) { + delete[] indices_u16_scratch; + indices_u16_scratch = new GLushort[lineCount]; + scratch16Capacity = lineCount; + } + GLushort* linesIndexBuffer = indices_u16_scratch; + if (mode == GL_TRIANGLES) { + for (GLsizei i = 0; i + 2 < count; i += 3) { + GLushort a = indices_u16[i+0]; + GLushort b = indices_u16[i+1]; + GLushort c = indices_u16[i+2]; + *linesIndexBuffer++ = a; *linesIndexBuffer++ = b; + *linesIndexBuffer++ = b; *linesIndexBuffer++ = c; + *linesIndexBuffer++ = c; *linesIndexBuffer++ = a; + } + } + indices_u16 = indices_u16_scratch; + count = (GLsizei)(linesIndexBuffer - indices_u16_scratch); + //mode = GL_LINES; //TODO: add a renderer for lines? nah? + } + if (max <= 255) { + static uint8_t* indices_u8_scratch = NULL; + static int scratchCapacity = 0; + if (scratchCapacity < count) { + delete[] indices_u8_scratch; + indices_u8_scratch = new uint8_t[count]; + scratchCapacity = (int)count; + } + for (GLsizei i = 0; i < count; ++i) { + indices_u8_scratch[i] = (uint8_t)indices_u16[i]; + } + CGeomManager& gmanager = pGLContext->GetGeomManager(); + gmanager.IndexedArraysGeomStage(mode, (int)count, indices_u8_scratch, numVertices); + } else { + CGeomManager& gmanager = pGLContext->GetGeomManager(); + gmanager.IndexedArraysGeomStage(mode, (int)count, (const unsigned char*)indices_u16, numVertices); + } } /** @@ -388,6 +503,17 @@ void glColor4f(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) gmanager.Color(cpu_vec_xyzw(red, green, blue, alpha)); } +//raylib need this function +void glColor4ub(GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha) +{ + GL_FUNC_DEBUG("%s\n", __FUNCTION__); + float r = (float)red/255.0; + float b = (float)blue/255.0; + float g = (float)green/255.0; + float a = (float)alpha/255.0; + glColor4f(r,g,b,a); +} + void glColor4fv(const GLfloat* color) { GL_FUNC_DEBUG("%s\n", __FUNCTION__); @@ -438,7 +564,7 @@ void pglDrawIndexedArrays(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) { - pGLContext->GetGeomManager().DrawIndexedArrays(primType, numIndices, indices, numVertices); + pGLContext->GetGeomManager().IndexedArraysGeomStage(primType, numIndices, indices, numVertices); } /** diff --git a/src/immgmanager.cpp b/src/immgmanager.cpp index 378f070..3dfc054 100644 --- a/src/immgmanager.cpp +++ b/src/immgmanager.cpp @@ -5,6 +5,7 @@ main directory of this archive for more details. */ #include +#include #include "ps2s/cpu_matrix.h" #include "ps2s/displayenv.h" @@ -99,6 +100,7 @@ void CImmGeomManager::BeginGeom(GLenum mode) Geometry.SetPrimType(mode); Geometry.SetArrayType(kLinear); + ColorVariesInPrim = false; Geometry.SetNormals(CurNormalBuf->GetNextPtr()); Geometry.SetVertices(CurVertexBuf->GetNextPtr()); @@ -117,6 +119,11 @@ void CImmGeomManager::Vertex(cpu_vec_xyzw newVert) *CurTexCoordBuf += texCoord[0]; *CurTexCoordBuf += texCoord[1]; + if (ColorVariesInPrim) { + const cpu_vec_xyzw color = GetCurGeomColor(); + *CurColorBuf += color; + Geometry.AddColors(); + } *CurVertexBuf += newVert; Geometry.AddVertices(); @@ -134,9 +141,18 @@ void CImmGeomManager::Normal(cpu_vec_xyz normal) void CImmGeomManager::Color(cpu_vec_xyzw color) { if (InsideBeginEnd) { - *CurColorBuf += color; - Geometry.AddColors(); + if (!ColorVariesInPrim) { + int backFillVertexCount = Geometry.GetNumNewVertices() - Geometry.GetNumNewColors(); + const cpu_vec_xyzw currentColor = GetCurGeomColor(); + for (int i = 0; i < backFillVertexCount; ++i) { + *CurColorBuf += currentColor; + Geometry.AddColors(); + } + ColorVariesInPrim = true; + } + SetCurGeomColor(color); } else { + SetCurGeomColor(color); GLContext.GetMaterialManager().Color(color); } } @@ -155,32 +171,37 @@ void CImmGeomManager::EndGeom() Geometry.SetNormalsAreValid(true); Geometry.SetTexCoordsAreValid(true); - // check colors - Geometry.SetColorsAreValid(false); - if (Geometry.GetNumNewColors() > 0) { - mErrorIf(Geometry.GetNumNewVertices() != Geometry.GetNumNewColors(), - "Sorry, but inside glBegin/glEnd you need " - "to specify either one color for each vertex given, or none."); - Geometry.SetColorsAreValid(true); + const bool useColorLane = + (!GLContext.GetImmLighting().GetLightingEnabled() && ColorVariesInPrim) || + ( GLContext.GetImmLighting().GetLightingEnabled() && + GLContext.GetMaterialManager().GetColorMaterialEnabled() && + ColorVariesInPrim); - SyncColorMaterial(true); - } else { - SyncColorMaterial(false); - } + LaneConfig lanes; + lanes.vertices = QW_XYZW; + lanes.normals = GLContext.GetImmLighting().GetLightingEnabled() ? QW_XYZ : QW_NONE; + lanes.texcoords = GLContext.GetTexManager().GetTexEnabled() ? QW_XY : QW_NONE; + lanes.colors = useColorLane ? QW_XYZW : QW_NONE; + + ValidateLaneConfig(&lanes, "EndGeom"); - Geometry.SetWordsPerVertex(4); - Geometry.SetWordsPerNormal(3); - Geometry.SetWordsPerTexCoord(2); - Geometry.SetWordsPerColor(4); + Geometry.SetColorsAreValid(LanePresent(lanes.colors)); + SyncColorMaterial(LanePresent(lanes.colors)); + RendererManager.PerVtxMaterialChanged(useColorLane ? RendererProps::kDiffuse : RendererProps::kNoMaterial); + + Geometry.SetWordsPerVertex(QWToWords(lanes.vertices)); + Geometry.SetWordsPerNormal(QWToWords(lanes.normals)); + Geometry.SetWordsPerTexCoord(QWToWords(lanes.texcoords)); + Geometry.SetWordsPerColor(QWToWords(lanes.colors)); CommitNewGeom(); } /******************************************** - * DrawArrays + * LinearArraysGeomStage */ -void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) +void CImmGeomManager::LinearArraysGeomStage(GLenum mode, int first, int count) { if (Prim != mode) PrimChanged(mode); @@ -191,68 +212,71 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) Geometry.SetVertices(VertArray->GetVertices()); Geometry.SetNormals(VertArray->GetNormals()); Geometry.SetTexCoords(VertArray->GetTexCoords()); - Geometry.SetColors(VertArray->GetColors()); + + void* colorsPtr = VertArray->GetColors(); + const bool colorArrayEnabled = VertArray->GetColorsAreValid() && VertArray->GetWordsPerColor() == 4; + + if (colorArrayEnabled && VertArray->GetColorSrcType() == kColor_UByte) { + float* bufStart = (float*)CurColorBuf->GetNextPtr(); + const unsigned char* srcColorBuf_U8 = (const unsigned char*)colorsPtr; + const int totalCount = first + count; + for (int i = 0; i < totalCount; ++i) { + const unsigned char* colorChannels = srcColorBuf_U8 + 4*i; + *CurColorBuf += colorChannels[0] / 255.0f; + *CurColorBuf += colorChannels[1] / 255.0f; + *CurColorBuf += colorChannels[2] / 255.0f; + *CurColorBuf += colorChannels[3] / 255.0f; + } + colorsPtr = bufStart; + } + + Geometry.SetColors(colorsPtr); Geometry.SetVerticesAreValid(VertArray->GetVerticesAreValid()); Geometry.SetNormalsAreValid(VertArray->GetNormalsAreValid()); Geometry.SetTexCoordsAreValid(VertArray->GetTexCoordsAreValid()); - Geometry.SetColorsAreValid(VertArray->GetColorsAreValid()); - Geometry.SetWordsPerVertex(VertArray->GetWordsPerVertex()); - Geometry.SetWordsPerNormal(VertArray->GetWordsPerNormal()); - Geometry.SetWordsPerTexCoord(VertArray->GetWordsPerTexCoord()); - Geometry.SetWordsPerColor(VertArray->GetWordsPerColor()); + const bool arrayHasColors = VertArray->GetColorsAreValid() && (VertArray->GetWordsPerColor() > 0); + + LaneConfig lanes; + lanes.vertices = (VertArray->GetWordsPerVertex() == 4) ? QW_XYZW : + (VertArray->GetWordsPerVertex() == 3) ? QW_XYZ : QW_NONE; + lanes.normals = (VertArray->GetNormalsAreValid() && + VertArray->GetWordsPerNormal() == 3 && + GLContext.GetImmLighting().GetLightingEnabled()) ? QW_XYZ : QW_NONE; + lanes.texcoords = (VertArray->GetTexCoordsAreValid() && + VertArray->GetWordsPerTexCoord() == 2) ? QW_XY : QW_NONE; + lanes.colors = + ((!GLContext.GetImmLighting().GetLightingEnabled() && arrayHasColors) || + ( GLContext.GetImmLighting().GetLightingEnabled() && + GLContext.GetMaterialManager().GetColorMaterialEnabled() && + arrayHasColors)) ? QW_XYZW : QW_NONE; + + ValidateLaneConfig(&lanes, "LinearArraysGeomStage"); + + Geometry.SetWordsPerVertex(QWToWords(lanes.vertices)); + Geometry.SetWordsPerNormal(QWToWords(lanes.normals)); + Geometry.SetWordsPerTexCoord(QWToWords(lanes.texcoords)); + Geometry.SetWordsPerColor(QWToWords(lanes.colors)); + + Geometry.SetColorsAreValid(LanePresent(lanes.colors)); + SyncColorMaterial(LanePresent(lanes.colors)); + RendererManager.PerVtxMaterialChanged(LanePresent(lanes.colors) ? RendererProps::kDiffuse : RendererProps::kNoMaterial); Geometry.AddVertices(count); Geometry.AddNormals(count); Geometry.AddTexCoords(count); - Geometry.AddColors(count); - + if (LanePresent(lanes.colors)) Geometry.AddColors(count); Geometry.AdjustNewGeomPtrs(first); - // do this before sync'ing the vu1 renderer in CommitNewGeom - SyncColorMaterial(VertArray->GetColors() != NULL); - CommitNewGeom(); } -void CImmGeomManager::DrawingIndexedArray() -{ - if (!LastArrayAccessIsValid || !LastArrayAccessWasIndexed) { - GLContext.ArrayAccessChanged(); - RendererManager.ArrayAccessChanged(RendererProps::kIndexed); - LastArrayAccessIsValid = true; - } - LastArrayAccessWasIndexed = true; -} - -void CImmGeomManager::DrawIndexedArrays(GLenum primType, +void CImmGeomManager::IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) { - /* - // make sure there's no pending geometry - Flush(); - - // do these before sync'ing the vu1 renderer - SyncColorMaterial(VertArray->GetColors() != NULL); - DrawingIndexedArray(); - - // now update the renderer and render - - bool rendererChanged = RendererManager.UpdateRenderer(); - - if ( rendererChanged ) { - RendererManager.LoadRenderer(GLContext.GetVif1Packet()); - } - SyncRendererContext(primType); - SyncGsContext(); - - RendererManager.GetCurRenderer().DrawIndexedArrays( primType, numIndices, indices, - numVertices, *VertArray ); - */ - if (Prim != primType) - PrimChanged(primType); + if (Prim != primType) PrimChanged(primType); Geometry.SetPrimType(primType); Geometry.SetArrayType(kIndexed); @@ -260,30 +284,64 @@ void CImmGeomManager::DrawIndexedArrays(GLenum primType, Geometry.SetVertices(VertArray->GetVertices()); Geometry.SetNormals(VertArray->GetNormals()); Geometry.SetTexCoords(VertArray->GetTexCoords()); - Geometry.SetColors(VertArray->GetColors()); + + void* colorsPtr = VertArray->GetColors(); + const bool colorArrayEnabled = VertArray->GetColorsAreValid() && VertArray->GetWordsPerColor() == 4; + if (colorArrayEnabled && VertArray->GetColorSrcType() == kColor_UByte) { + float* bufStart = (float*)CurColorBuf->GetNextPtr(); + const unsigned char* srcColorBuf_U8 = (const unsigned char*)colorsPtr; + for (int i = 0; i < numVertices; ++i) { + const unsigned char* colorChannels = srcColorBuf_U8 + 4*i; + *CurColorBuf += (float)colorChannels[0] / 255.0f; + *CurColorBuf += (float)colorChannels[1] / 255.0f; + *CurColorBuf += (float)colorChannels[2] / 255.0f; + *CurColorBuf += (float)colorChannels[3] / 255.0f; + } + colorsPtr = bufStart; + } + Geometry.SetColors(colorsPtr); Geometry.SetVerticesAreValid(VertArray->GetVerticesAreValid()); Geometry.SetNormalsAreValid(VertArray->GetNormalsAreValid()); Geometry.SetTexCoordsAreValid(VertArray->GetTexCoordsAreValid()); - Geometry.SetColorsAreValid(VertArray->GetColorsAreValid()); - Geometry.SetWordsPerVertex(VertArray->GetWordsPerVertex()); - Geometry.SetWordsPerNormal(VertArray->GetWordsPerNormal()); - Geometry.SetWordsPerTexCoord(VertArray->GetWordsPerTexCoord()); - Geometry.SetWordsPerColor(VertArray->GetWordsPerColor()); + const bool arrayHasColors = VertArray->GetColorsAreValid() && (VertArray->GetWordsPerColor() > 0); + + LaneConfig lanes; + lanes.vertices = (VertArray->GetWordsPerVertex() == 4) ? QW_XYZW : + (VertArray->GetWordsPerVertex() == 3) ? QW_XYZ : QW_NONE; + lanes.normals = (VertArray->GetNormalsAreValid() && + VertArray->GetWordsPerNormal() == 3 && + GLContext.GetImmLighting().GetLightingEnabled()) ? QW_XYZ : QW_NONE; + lanes.texcoords = (VertArray->GetTexCoordsAreValid() && + VertArray->GetWordsPerTexCoord() == 2) ? QW_XY : QW_NONE; + lanes.colors = + ((!GLContext.GetImmLighting().GetLightingEnabled() && arrayHasColors) || + ( GLContext.GetImmLighting().GetLightingEnabled() && + GLContext.GetMaterialManager().GetColorMaterialEnabled() && + arrayHasColors)) ? QW_XYZW : QW_NONE; + + ValidateLaneConfig(&lanes, "IndexedArraysGeomStage"); + + Geometry.SetWordsPerVertex(QWToWords(lanes.vertices)); + Geometry.SetWordsPerNormal(QWToWords(lanes.normals)); + Geometry.SetWordsPerTexCoord(QWToWords(lanes.texcoords)); + Geometry.SetWordsPerColor(QWToWords(lanes.colors)); + + Geometry.SetColorsAreValid(LanePresent(lanes.colors)); + SyncColorMaterial(LanePresent(lanes.colors)); + RendererManager.PerVtxMaterialChanged(LanePresent(lanes.colors) ? RendererProps::kDiffuse + : RendererProps::kNoMaterial); Geometry.AddVertices(numVertices); Geometry.AddNormals(numVertices); Geometry.AddTexCoords(numVertices); - Geometry.AddColors(numVertices); + if (LanePresent(lanes.colors)) Geometry.AddColors(numVertices); Geometry.SetNumIndices(numIndices); Geometry.SetIndices(indices); Geometry.SetIStripLengths(NULL); - // do this before sync'ing the vu1 renderer in CommitNewGeom - SyncColorMaterial(VertArray->GetColors() != NULL); - CommitNewGeom(); } @@ -291,6 +349,16 @@ void CImmGeomManager::DrawIndexedArrays(GLenum primType, * common and synchronization code */ +void CImmGeomManager::DrawingIndexedArray() +{ + if (!LastArrayAccessIsValid || !LastArrayAccessWasIndexed) { + GLContext.ArrayAccessChanged(); + RendererManager.ArrayAccessChanged(RendererProps::kIndexed); + LastArrayAccessIsValid = true; + } + LastArrayAccessWasIndexed = true; +} + void CImmGeomManager::DrawingLinearArray() { if (!LastArrayAccessIsValid || LastArrayAccessWasIndexed) { @@ -363,12 +431,8 @@ void CImmGeomManager::SyncRenderer() void CImmGeomManager::SyncRendererContext(GLenum primType) { // resend the rendering context if necessary - if (GLContext.GetRendererContextChanged() - || (RendererManager.IsCurRendererCustom() && UserRenderContextChanged)) { - RendererManager.GetCurRenderer().InitContext(primType, - GLContext.GetRendererContextChanged(), - UserRenderContextChanged); - + if (GLContext.GetRendererContextChanged() || (RendererManager.IsCurRendererCustom() && UserRenderContextChanged)) { + RendererManager.GetCurRenderer().InitContext(primType, GLContext.GetRendererContextChanged(), UserRenderContextChanged); GLContext.SetRendererContextChanged(false); UserRenderContextChanged = false; Prim = primType; @@ -409,7 +473,8 @@ void CImmGeomManager::SyncGsContext() void CImmGeomManager::SyncColorMaterial(bool pvColorsArePresent) { CMaterialManager& mm = GLContext.GetMaterialManager(); - if (pvColorsArePresent && mm.GetColorMaterialEnabled()) { + //if (pvColorsArePresent && mm.GetColorMaterialEnabled()) { + if (GLContext.GetImmLighting().GetLightingEnabled() && pvColorsArePresent && mm.GetColorMaterialEnabled()) { switch (mm.GetColorMaterialMode()) { case GL_EMISSION: mNotImplemented("Only GL_DIFFUSE can change per-vertex"); diff --git a/src/indexed_renderer.cpp b/src/indexed_renderer.cpp index 075dd21..bf1045d 100644 --- a/src/indexed_renderer.cpp +++ b/src/indexed_renderer.cpp @@ -66,7 +66,7 @@ void CIndexedRenderer::InitContext(GLenum primType, uint32_t rcChanges, bool use if (doLighting) materialEmm = material.GetEmission() * maxColorValue; else - materialEmm = glContext.GetMaterialManager().GetCurColor() * maxColorValue; + materialEmm = glContext.GetMaterialManager().GetCurMatColor() * maxColorValue; ConstantVertColor = materialAmb * globalAmb + materialEmm; } @@ -121,7 +121,7 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) colors = block.GetColorsAreValid() ? block.GetColors(curArray) : NULL; packet.Cnt(); - packet.Stcycl(1, 3).Nop(); + packet.Stcycl(1, InputQuadsPerVert).Nop(); packet.CloseTag(); int numIndices = block.GetNumIndices(curArray); @@ -175,20 +175,21 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) packet += 0; } packet.CloseUnpack(); - - // constant color of each vertex - - packet.Strow(&ConstantVertColor); - packet.Stcycl(numVertices, 0); - Vifs::tMask mask = { 1, 1, 1, 3, - 1, 1, 1, 3, - 1, 1, 1, 3, - 1, 1, 1, 3 }; - packet.Stmask(mask); - packet.OpenUnpack(Vifs::UnpackModes::v4_32, kTempAreaStart, - Packet::kDoubleBuff, Packet::kMasked); - packet.CloseUnpack(numVertices); - + if (!colors) + { + // constant color of each vertex + + packet.Strow(&ConstantVertColor); + packet.Stcycl(numVertices, 0); + Vifs::tMask mask = { 1, 1, 1, 3, + 1, 1, 1, 3, + 1, 1, 1, 3, + 1, 1, 1, 3 }; + packet.Stmask(mask); + packet.OpenUnpack(Vifs::UnpackModes::v4_32, kTempAreaStart, + Packet::kDoubleBuff, Packet::kMasked); + packet.CloseUnpack(numVertices); + } // start renderer packet.Mscnt(); @@ -196,4 +197,4 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) } packet.CloseTag(); } -} +} \ No newline at end of file diff --git a/src/material.cpp b/src/material.cpp index dfef9a7..eec318b 100644 --- a/src/material.cpp +++ b/src/material.cpp @@ -107,7 +107,7 @@ void CDListMaterial::SetShininess(float shine) void CMaterialManager::Color(cpu_vec_xyzw color) { - CurColor = color; + CurMatColor = color; if (UseColorMaterial) { switch (ColorMaterialMode) { @@ -152,7 +152,7 @@ void CMaterialManager::SetUseColorMaterial(bool yesNo) if (!InDListDef) { UseColorMaterial = yesNo; if (yesNo) - Color(CurColor); + Color(CurMatColor); GLContext.CurMaterialChanged(); } else { CDList& dlist = GLContext.GetDListManager().GetOpenDList(); @@ -180,7 +180,7 @@ void CMaterialManager::SetColorMaterialMode(GLenum mode) { if (!InDListDef) { ColorMaterialMode = mode; - Color(CurColor); + Color(CurMatColor); GLContext.CurMaterialChanged(); } else { CDList& dlist = GLContext.GetDListManager().GetOpenDList(); diff --git a/src/renderermanager.cpp b/src/renderermanager.cpp index dda444c..94d2da1 100644 --- a/src/renderermanager.cpp +++ b/src/renderermanager.cpp @@ -88,6 +88,84 @@ CRendererManager::CRendererManager(CGLContext& context) kInputStart, kInputBufSize - kInputStart, "fast, no lights")); } + { + CRendererProps capabilities = { + .PrimType = kTriangles, + .Lighting = 0, + .NumDirLights = k3DirLights, + .NumPtLights = 0, + .Texture = 1, + .Specular = 0, + .PerVtxMaterial = kNoMaterial, + .Clipping = kNonClipped | kClipped, + .CullFace = 0, + .TwoSidedLighting = 0, + .ArrayAccess = kIndexed + }; + RegisterDefaultRenderer( + new CIndexedRenderer( + mVsmAddr(IndexedConstColor), + mVsmSize(IndexedConstColor), + capabilities, + no_reqs, + 3, + 3, + "indexed, constant color, tri") + ); + } + { + CRendererProps capabilities = { + .PrimType = kTriangles, + .Lighting = 0, + .NumDirLights = k3DirLights, + .NumPtLights = 0, + .Texture = 1, + .Specular = 0, + .PerVtxMaterial = kDiffuse, //TODO: this is just to allow for only certain targets to get pvc (its a hack to get behavior, clean up next + .Clipping = kNonClipped | kClipped, + .CullFace = 0, + .TwoSidedLighting = 0, + .ArrayAccess = kIndexed + }; + RegisterDefaultRenderer( + new CIndexedRenderer( + mVsmAddr(IndexedPVC), + mVsmSize(IndexedPVC), + capabilities, + no_reqs, + 4, + 3, + "indexed, pvc, tri") + ); + } + // unlit renderer per vertex color + { + CRendererProps capabilities = { + .PrimType = kTriangles, + .Lighting = 0, + .NumDirLights = k3DirLights, + .NumPtLights = 0, + .Texture = 0, + .Specular = 0, + .PerVtxMaterial = kDiffuse, //TODO: this is just to allow for only certain targets to get pvc (its a hack to get behavior, clean up next + .Clipping = kNonClipped | kClipped, + .CullFace = 0, + .TwoSidedLighting = 0, + .ArrayAccess = kLinear + }; + RegisterDefaultRenderer( + new CLinearRenderer( + mVsmAddr(FastNoLightsPVCTri), + mVsmSize(FastNoLightsPVCTri), + capabilities, + no_reqs, + 4, + 3, + kInputStart, + kInputBufSize - kInputStart, + "linear fast no lights, pvc, tri") + ); + } // fast renderer { CRendererProps capabilities = { diff --git a/vu1/fast_no_lights_pvc_tri.vcl b/vu1/fast_no_lights_pvc_tri.vcl new file mode 100644 index 0000000..535cd16 --- /dev/null +++ b/vu1/fast_no_lights_pvc_tri.vcl @@ -0,0 +1,88 @@ +/* Per-vertex color, no lights, triangles -- based off of fast_no_lights */ + + #include "vu1_mem_linear.h" + + .include "db_in_db_out.i" + .include "math.i" + .include "lighting.i" + .include "clip_cull.i" + .include "geometry.i" + .include "io.i" + .include "general.i" + +kInputQPerV .equ 4 +kOutputQPerV .equ 3 + + .init_vf_all + .init_vi_all + + .name vsmFastNoLightsPVCTri + + --enter + --endenter + + ; ------------------------ initialization --------------------------------- + + load_vert_xfrm vert_xform + + init_constants + + sub trans[0], vf00, vf00 + sub trans[1], vf00, vf00 + sub trans[2], vf00, vf00 + maxw.x trans[0], trans[0], vf00 + maxw.y trans[1], trans[1], vf00 + maxw.z trans[2], trans[2], vf00 + move.xyz trans[3], gs_offsets + move.w trans[3], vf00 + + mul_vec_mat_44 new_xform[0], trans, vert_xform[0] + mul_vec_mat_44 new_xform[1], trans, vert_xform[1] + mul_vec_mat_44 new_xform[2], trans, vert_xform[2] + mul_vec_mat_44 new_xform[3], trans, vert_xform[3] + + --cont + + ; -------------------- transform & texture loop --------------------------- + +main_loop_lid: + + init_io_loop + init_out_buf + + set_strip_adcs + +xform_loop_lid: + --LoopCS 1,3 + + load_vert vert + xform_vert xformed_vert, new_xform, vert + ftoi4.xyz gs_vert, xformed_vert + + load_strip_adc strip_adc + set_adc_s gs_vert, strip_adc + store_xyzf gs_vert + + load_pvcolor vtx_color + loi 255.0 + muli.xyz vtx_color, vtx_color, i + addi.w vtx_color, vf00, i + max vtx_color, vtx_color, vf00 + ftoi0 vtx_color, vtx_color + store_rgba vtx_color + + load_stq tex_stq + xform_tex_stq tex_stq, tex_stq, Q + store_stq tex_stq + + next_io + loop_io xform_loop_lid + + ; -------------------- done! ------------------------------- + +done_lid: + kick_to_gs + --cont + b main_loop_lid + +.END diff --git a/vu1/fast_no_lights_pvc_tri_vcl.vsm b/vu1/fast_no_lights_pvc_tri_vcl.vsm new file mode 100644 index 0000000..ecbdb0a --- /dev/null +++ b/vu1/fast_no_lights_pvc_tri_vcl.vsm @@ -0,0 +1,193 @@ +; === __LP__ EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: +; === ldumb : optimal=14 clid=0 mlid=2 size=(14) +; === normal1 : optimal=14 clid=0 mlid=3 size=(14) +; === hDown : optimal=14 clid=0 mlid=4 size=(14) +; === vuta : optimal=14 clid=0 mlid=2 size=(14) +; === dUp : optimal=14 clid=1 mlid=2 size=(14) +; === normal : optimal=14 clid=0 mlid=4 size=(14) +; === another : optimal=14 clid=0 mlid=4 size=(14) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmFastNoLightsPVCTri_CodeStart + .global vsmFastNoLightsPVCTri_CodeEnd +vsmFastNoLightsPVCTri_CodeStart: +__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 25 [25 0] 25 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_4] + sub VF06,VF00,VF00 lq.w VF07,57(VI00) + sub VF05,VF00,VF00 NOP + sub VF04,VF00,VF00 loi 0x44fff000 + addi.xy VF07,VF00,I NOP + maxw.x VF06,VF06,VF00w mr32.z VF07,VF07 + maxw.y VF05,VF05,VF00w lq VF01,62(VI00) + maxw.z VF04,VF04,VF00w NOP + max.w VF07,VF00,VF00 NOP + max.xyz VF07,VF07,VF07 NOP + mulax ACC,VF06,VF01x lq VF02,63(VI00) + madday ACC,VF05,VF01y NOP + maddaz ACC,VF04,VF01z NOP + maddw VF01,VF07,VF01w NOP + mulax ACC,VF06,VF02x lq VF03,64(VI00) + madday ACC,VF05,VF02y NOP + maddaz ACC,VF04,VF02z NOP + maddw VF02,VF07,VF02w NOP + mulax ACC,VF06,VF03x lq VF08,65(VI00) + madday ACC,VF05,VF03y NOP + maddaz ACC,VF04,VF03z NOP + maddw VF03,VF07,VF03w NOP + mulax ACC,VF06,VF08x NOP + madday ACC,VF05,VF08y NOP + maddaz[E] ACC,VF04,VF08z NOP + maddw VF04,VF07,VF08w NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 20 [20 0] 20 [main_loop_lid] + NOP xtop VI01 + NOP lq VF05,75(VI00) + NOP ilw.x VI04,0(VI01) + NOP iaddiu VI06,VI01,0x00000001 + NOP iaddiu VI02,VI01,0x00000005 + NOP mtir VI05,VF05x + NOP iadd VI03,VI02,VI04 + NOP ior VI05,VI05,VI04 + NOP iadd VI03,VI03,VI04 + NOP mfir.x VF05,VI05 + NOP iadd VI05,VI03,VI04 + NOP iaddiu VI03,VI01,0 + NOP iadd VI04,VI05,VI04 + NOP sq VF05,236(VI03) + NOP iaddiu VI05,VI01,0x00000005 + NOP iaddiu VI07,VI06,0x00000004 + NOP iaddiu VI08,VI00,0x000003ff + NOP iaddiu VI09,VI00,0x00000800 + NOP iaddiu VI10,VI00,0x00000400 + NOP iaddiu VI11,VI00,0x00000020 +adcLoop_lid: +; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] + NOP lq VF05,0(VI06) + ftoi0 VF05,VF05 NOP ; STALL_LATENCY ?3 + NOP mtir VI12,VF05x ; STALL_LATENCY ?3 + NOP iand VI13,VI12,VI10 + NOP NOP + NOP ibeq VI13,VI10,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP NOP +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_8] + NOP iand VI14,VI12,VI08 + NOP mtir VI13,VF05y + NOP iand VI12,VI12,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI13,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI12,4(VI14) +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_9] + NOP iand VI14,VI13,VI08 + NOP mtir VI12,VF05z + NOP iand VI13,VI13,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI12,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI13,4(VI14) +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_10] + NOP iand VI14,VI12,VI08 + NOP mtir VI13,VF05w + NOP iand VI12,VI12,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI13,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI12,4(VI14) +; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_11] + NOP iand VI12,VI13,VI08 + NOP iaddiu VI06,VI06,0x00000001 + NOP iand VI13,VI13,VI09 + NOP iadd VI12,VI12,VI05 + NOP isw.w VI11,0(VI12) + NOP ibne VI06,VI07,adcLoop_lid + NOP isw.w VI13,4(VI12) +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT: +; _LNOPT_w=[ ] 8 [12 0] 12 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT] + NOP lq.xyz VF05,0(VI02) + mulax ACC,VF01,VF05x iaddiu VI03,VI03,0 ; STALL_LATENCY ?3 + madday ACC,VF02,VF05y lq.xyz VF08,3(VI02) + maddaz ACC,VF03,VF05z iaddiu VI02,VI02,0x00000004 + maddw VF05,VF04,VF00w loi 0x437f0000 + muli.xyz VF08,VF08,I ilw.w VI05,-4(VI02) ; STALL_LATENCY ?1 + addi.w VF08,VF00,I ibeq VI02,VI04,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI1 + NOP div Q,VF00w,VF05w +; _LNOPT_w=[ ] 14 [12 0] 14 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__PRO1] + NOP lq.xyz VF07,0(VI02) + NOP NOP + max VF06,VF08,VF00 NOP + max.xyz VF08,VF05,VF05 NOP + mulax ACC,VF01,VF07x iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF07y lq.xyz VF10,3(VI02) + maddaz ACC,VF03,VF07z iaddiu VI02,VI02,0x00000004 + maddw VF05,VF04,VF00w loi 0x437f0000 + mulq.xyz VF09,VF08,Q lq.xyz VF07,-6(VI02) + muli.xyz VF08,VF10,I iaddiu VI03,VI03,0x000000f3 + addi.w VF08,VF00,I mfir.w VF07,VI05 + ftoi0 VF10,VF06 div Q,VF00w,VF05w + mulq.xyz VF06,VF07,Q ibeq VI02,VI04,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI0 + ftoi4.xyz VF07,VF09 ilw.w VI05,-4(VI02) +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 14 [14 14] 14 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__MAIN_LOOP] + NOP lq.xyz VF09,0(VI02) + NOP sq VF10,-5(VI03) + max VF06,VF08,VF00 sq.xyz VF06,-6(VI03) + max.xyz VF08,VF05,VF05 sq VF07,-4(VI03) + mulax ACC,VF01,VF09x iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF09y lq.xyz VF10,3(VI02) + maddaz ACC,VF03,VF09z lq.xyz VF07,-2(VI02) + maddw VF05,VF04,VF00w iaddiu VI02,VI02,0x00000004 + mulq.xyz VF09,VF08,Q loi 0x437f0000 + muli.xyz VF08,VF10,I iaddiu VI03,VI03,0x00000003 + addi.w VF08,VF00,I mfir.w VF07,VI05 + ftoi0 VF10,VF06 div Q,VF00w,VF05w + mulq.xyz VF06,VF07,Q ibne VI02,VI04,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__MAIN_LOOP + ftoi4.xyz VF07,VF09 ilw.w VI05,-4(VI02) +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI0: +; _LNOPT_w=[ ] 14 [13 0] 16 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI0] + NOP NOP + NOP NOP + max VF06,VF08,VF00 sq.xyz VF06,-6(VI03) + max.xyz VF08,VF05,VF05 sq VF07,-4(VI03) + NOP lq.xyz VF07,-2(VI02) + mulq.xyz VF08,VF08,Q NOP ; STALL_LATENCY ?2 + ftoi0 VF10,VF06 sq VF10,-5(VI03) + mulq.xyz VF06,VF07,Q iaddiu VI05,VI05,0x00007fff + NOP mfir.w VF07,VI05 + ftoi4.xyz VF07,VF08 NOP + NOP sq VF10,-2(VI03) + NOP sq.xyz VF06,-3(VI03) + NOP b EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EXIT_POINT + NOP sq VF07,-1(VI03) +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI1: +; _LNOPT_w=[ ] 11 [13 0] 16 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI1] + NOP NOP + NOP NOP + max VF06,VF08,VF00 NOP + max.xyz VF08,VF05,VF05 lq.xyz VF05,-2(VI02) + mulq.xyz VF08,VF08,Q NOP ; STALL_LATENCY ?3 + ftoi0 VF07,VF06 iaddiu VI05,VI05,0x00007fff + mulq.xyz VF06,VF05,Q mfir.w VF08,VI05 + ftoi4.xyz VF08,VF08 NOP ; STALL_LATENCY ?1 + NOP sq VF07,238(VI03) + NOP sq.xyz VF06,237(VI03) + NOP sq VF08,239(VI03) ; STALL_LATENCY ?1 +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_15] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmFastNoLightsPVCTri_CodeEnd: +; iCount=150 +; register stats: +; 16 VU User integer +; 11 VU User floating point diff --git a/vu1/general.i b/vu1/general.i index d197f31..51c315e 100644 --- a/vu1/general.i +++ b/vu1/general.i @@ -85,13 +85,13 @@ sqrt q, \atten[z] addw.x \atten, vf00, vf00 addq.y \atten, vf00, q - div q, vf00w, \atten[y] + div q, vf00[w], \atten[y] mulq.xyz \vert_to_light, \vert_to_light, q dot3_to_w \atten, \atten, \atten_coeff .endm .macro atten_color output, vert_color, atten - div q, vf00w, \atten[w] + div q, vf00[w], \atten[w] mulq.xyz \output, \vert_color, q .endm diff --git a/vu1/geometry.i b/vu1/geometry.i index 8abf73d..44afa4b 100644 --- a/vu1/geometry.i +++ b/vu1/geometry.i @@ -97,7 +97,7 @@ .macro xform_vert xformed_vert, vert_xform, vert mul_pt_mat_44 \xformed_vert, \vert_xform, \vert - div q, vf00w, \xformed_vert[w] + div q, vf00[w], \xformed_vert[w] mulq.xyz \xformed_vert, \xformed_vert, q ; FIXME: visible vertices are now in range (+-320, +-112, +-2^24-1) .endm diff --git a/vu1/indexed.vcl b/vu1/indexed.vcl index 678b3f9..ad7d257 100644 --- a/vu1/indexed.vcl +++ b/vu1/indexed.vcl @@ -164,6 +164,31 @@ done_lighting_lid: iaddiu input_start, buffer_top, kInputGeomStart iaddiu color_start, buffer_top, kTempAreaStart + ; If lighting is OFF (no dir lights AND no point lights), seed the + ; per-vertex color accumulation buffer at kTempAreaStart with a constant + ; color = emission + (globalAmbient ⊙ materialAmbient). + ; This makes the later "lq.xyz vert_color, (next_color)" path valid even + ; when the lighting loops above were skipped entirely. + + ibne num_dir_lights, vi00, colors_ready_lid + ibne num_pt_lights, vi00, colors_ready_lid + + ; Load vertex count and set write ptr to the temp color buffer + ilw.x num_vertices, kNumVertices(buffer_top) + iaddiu next_color_acc, buffer_top, kTempAreaStart + + ; Build the constant base color once + get_cnst_color vert_color + + ; for (seed_i = 0; seed_i < num_vertices; ++seed_i) + iaddiu seed_i, vi00, 0 +seed_const_colors_lid: + sqi.xyz vert_color, (next_color_acc++) + iaddiu seed_i, seed_i, 1 + ibne seed_i, num_vertices, seed_const_colors_lid + +colors_ready_lid: + ; set up index-decompression iaddiu next_index, buffer_top, kInputGeomStart diff --git a/vu1/indexed_constant_color.vcl b/vu1/indexed_constant_color.vcl new file mode 100644 index 0000000..007bccc --- /dev/null +++ b/vu1/indexed_constant_color.vcl @@ -0,0 +1,116 @@ + #include "vu1_mem_indexed.h" + + .include "db_in_sb_out.i" + .include "math.i" + .include "lighting.i" + .include "clip_cull.i" + .include "geometry.i" + .include "io.i" + .include "general.i" + +kInputQPerV .equ 3 +kOutputQPerV .equ 3 + + .init_vf_all + .init_vi_all + + .name vsmIndexedConstColor + + --enter + --endenter + + load_vert_xfrm vert_xform + + --cont + +main_loop_lid: + init_constants + init_clip_cnst + + xtop buffer_top + iaddiu next_output, vi00, kOutputGeomStart + iaddiu input_start, buffer_top, kInputGeomStart + iaddiu color_start, buffer_top, kTempAreaStart ; CPU prefilled RGB per vertex + + iaddiu next_index, buffer_top, kInputGeomStart + iaddiu first_index_mask, vi00, 0xff + loi 253.0 + maxi.w index_constants, vf00, i + loi 3.0 + maxi.z index_constants, vf00, i + loi 255.0 + maxi.y index_constants, vf00, i + + ilw.y num_indices_d2, kNumIndicesD2(buffer_top) + iadd last_index, next_index, num_indices_d2 + ilw.z num_indices, kNumIndices(buffer_top) + + lq gif_tag, kGifTag(vi00) + mtir eop, gif_tagx + ior eop, eop, num_indices + mfir.x gif_tag, eop + mfir.w gif_tag, next_output + sq gif_tag, kOutputBufStart(vi00) + + loi 255.0 ; TODO: this is a flag or like control logicked during VIF packing actually?? + load_mat_diff vert_color, w + muli.w vert_color, vert_color, i + minii.w vert_color, vert_color, i + ftoi0.w vert_color, vert_color + + + + iaddiu zero_giftag, vi00, kGifTag + xgkick zero_giftag + +xform_loop_lid: --LoopCS 1,3 + ilw.w first_index, 0(next_index) + iand first_index, first_index, first_index_mask + iadd first_offset, first_index, first_index + iadd first_offset, first_offset, first_index + + lqi.w indices, (next_index++) + addy.w second_ind, indices, index_constants[y] + mtir second_index, second_ind[w] + mulz.w second_off, indices, index_constants[z] + add.w second_off, second_off, index_constants[w] + mtir second_offset, second_off[w] + + .macro do_vert + load_vert vert + xform_vert xformed_vert, vert_xform, vert + vert_to_gs gs_vert, xformed_vert + clip_vert xformed_vert + fcand vi01, 0x003ffff + iand vi01, vi01, do_clipping + set_adc_fs gs_vert, vi00 + store_xyzf gs_vert + + lq.xyz vert_color, (next_color) + muli.xyz vert_color, vert_color, i + minii.xyz vert_color, vert_color, i + ftoi0.xyz vert_color, vert_color + store_rgba vert_color + + + load_stq tex_stq + xform_tex_stq tex_stq, tex_stq, q + store_stq tex_stq + .endm + + iadd next_input, first_offset, input_start + iadd next_color, first_index, color_start + do_vert + + iadd next_input, second_offset, input_start + iadd next_color, second_index, color_start + iaddiu next_output, next_output, kOutputQPerV + do_vert + + next_o + ibne next_index, last_index, xform_loop_lid + kick_to_gs + +--cont + b main_loop_lid +.END diff --git a/vu1/indexed_constant_color_vcl.vsm b/vu1/indexed_constant_color_vcl.vsm new file mode 100644 index 0000000..80c1a3c --- /dev/null +++ b/vu1/indexed_constant_color_vcl.vsm @@ -0,0 +1,154 @@ +; === __LP__ EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=38 clid=0 mlid=1 size=(38) +; === vuta : optimal=38 clid=0 mlid=2 size=(38) +; === dUp : optimal=38 clid=0 mlid=1 size=(38) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmIndexedConstColor_CodeStart + .global vsmIndexedConstColor_CodeEnd +vsmIndexedConstColor_CodeStart: +__v_vu1_indexed_constant_color_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_indexed_constant_color_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ another ] 55 [49 0] 59 [main_loop_lid] + NOP xtop VI04 + NOP lq.xyz VF07,75(VI00) + NOP ilw.z VI06,0(VI04) + NOP iaddiu VI03,VI00,0x0000004e + NOP mfir.w VF07,VI03 + NOP mtir VI07,VF07x + NOP ior VI06,VI07,VI06 + NOP mfir.x VF07,VI06 + NOP loi 0x437f0000 + maxi.y VF10,VF00,I loi 0x40400000 + maxi.z VF09,VF00,I loi 0x45000000 + NOP sq VF07,77(VI00) + maxi.w VF07,VF00,I loi 0x44fff000 + addi.xy VF05,VF00,I loi 0x437d0000 + NOP lq.w VF06,60(VI00) + NOP lq.w VF05,57(VI00) + NOP ilw.y VI08,0(VI04) + NOP iaddiu VI01,VI00,0x0000004b + NOP iaddiu VI06,VI04,0x00000005 + maxi.w VF08,VF00,I loi 0x437f0000 + muli.w VF06,VF06,I iadd VI08,VI06,VI08 + NOP mr32.z VF05,VF05 + NOP xgkick VI01 + NOP lqi.w VF05,(VI06++) + minii.w VF06,VF06,I ilw.w VI01,-1(VI06) + NOP iaddiu VI07,VI00,0x000000ff + NOP iaddiu VI05,VI04,0x000000ac + NOP iaddiu VI04,VI04,0x00000005 + ftoi0.w VF11,VF06 iand VI10,VI01,VI07 + addy.w VF06,VF05,VF10y iadd VI01,VI10,VI10 + mulz.w VF05,VF05,VF09z iadd VI01,VI01,VI10 + NOP iadd VI01,VI01,VI04 + NOP lq.xyz VF08,0(VI01) + NOP mtir VI09,VF06w + add.w VF05,VF05,VF08 NOP + mulax ACC,VF01,VF08x iadd VI10,VI10,VI05 ; STALL_LATENCY ?1 + madday ACC,VF02,VF08y lq.xyz VF16,0(VI10) + maddaz ACC,VF03,VF08z mtir VI11,VF05w + maddw VF12,VF04,VF00w lq.xyz VF08,2(VI01) + NOP iadd VI01,VI11,VI04 + muli.xyz VF11,VF16,I lq.xyz VF16,0(VI01) + NOP div Q,VF00w,VF12w ; STALL_LATENCY ?1 + mulax ACC,VF01,VF16x NOP ; STALL_LATENCY ?1 + madday ACC,VF02,VF16y NOP + maddaz ACC,VF03,VF16z NOP + maddw VF13,VF04,VF00w lq.xyz VF06,76(VI00) + NOP NOP + mulq.xyz VF16,VF12,Q iadd VI09,VI09,VI05 + minii.xyz VF14,VF11,I lq.xyz VF11,0(VI09) + NOP div Q,VF00w,VF13w + add.xyz VF12,VF16,VF05 NOP ; STALL_LATENCY ?1 + mul.xyz VF16,VF16,VF06 ilw.w VI02,76(VI00) + muli.xyz VF15,VF11,I fcset 0 + ftoi0.xyz VF11,VF14 ibeq VI06,VI08,EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__EPI0 + mulq.xyz VF08,VF08,Q lq.xyz VF07,2(VI01) +EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 38 [38 38] 38 [EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__MAIN_LOOP] + clipw.xyz VF16xyz,VF07w lqi.w VF05,(VI06++) + NOP ilw.w VI09,-1(VI06) + minii.xyz VF11,VF15,I sq VF11,1(VI03) + NOP sq.xyz VF08,0(VI03) + addy.w VF06,VF05,VF10y fcand VI01,262143 + mulz.w VF05,VF05,VF09z iand VI11,VI09,VI07 + mulq.xyz VF08,VF13,Q iadd VI09,VI11,VI11 + NOP iadd VI09,VI09,VI11 + NOP mtir VI10,VF06w + add.w VF05,VF05,VF08 iadd VI09,VI09,VI04 + add.xyz VF13,VF08,VF05 iadd VI11,VI11,VI05 + mul.xyz VF15,VF08,VF06 lq.xyz VF08,0(VI09) + NOP iand VI12,VI01,VI02 + NOP mtir VI01,VF05w + ftoi4.xyz VF14,VF12 ior VI12,VI12,VI00 + mulax ACC,VF01,VF08x iaddiu VI12,VI12,0x00007fff + madday ACC,VF02,VF08y mfir.w VF14,VI12 + maddaz ACC,VF03,VF08z lq.xyz VF16,0(VI11) + maddw VF12,VF04,VF00w lq.xyz VF08,2(VI09) + clipw.xyz VF15xyz,VF07w iadd VI09,VI01,VI04 + NOP sq VF14,2(VI03) + muli.xyz VF14,VF16,I lq.xyz VF16,0(VI09) + mulq.xyz VF07,VF07,Q div Q,VF00w,VF12w + ftoi4.xyz VF15,VF13 iadd VI11,VI10,VI05 + ftoi0.xyz VF11,VF11 iaddiu VI10,VI03,0 + minii.xyz VF14,VF14,I fcand VI01,262143 + mulax ACC,VF01,VF16x iand VI03,VI01,VI02 + madday ACC,VF02,VF16y ior VI03,VI03,VI00 + maddaz ACC,VF03,VF16z iaddiu VI03,VI03,0x00007fff + mulq.xyz VF16,VF12,Q mfir.w VF15,VI03 + NOP sq VF11,4(VI10) + NOP lq.xyz VF11,0(VI11) + maddw VF13,VF04,VF00w iaddiu VI03,VI10,0x00000006 + add.xyz VF12,VF16,VF05 sq VF15,5(VI10) + mul.xyz VF16,VF16,VF06 sq.xyz VF07,3(VI10) + muli.xyz VF15,VF11,I lq.xyz VF07,2(VI09) + ftoi0.xyz VF11,VF14 ibne VI06,VI08,EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__MAIN_LOOP + mulq.xyz VF08,VF08,Q div Q,VF00w,VF13w +EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 28 [21 0] 30 [EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + clipw.xyz VF16xyz,VF07w sq VF11,1(VI03) + mulq.xyz VF08,VF13,Q sq.xyz VF08,0(VI03) + NOP NOP + NOP NOP + minii.xyz VF11,VF15,I fcand VI01,262143 + mul.xyz VF15,VF08,VF06 iand VI04,VI01,VI02 + NOP ior VI04,VI04,VI00 + ftoi4.xyz VF12,VF12 iaddiu VI04,VI04,0x00007fff + NOP mfir.w VF12,VI04 + clipw.xyz VF15xyz,VF07w iaddiu VI04,VI03,0 + add.xyz VF13,VF08,VF05 sq VF12,2(VI03) ; STALL_LATENCY ?2 + NOP fcand VI01,262143 + mulq.xyz VF07,VF07,Q iand VI03,VI01,VI02 + ftoi0.xyz VF11,VF11 ior VI03,VI03,VI00 + ftoi4.xyz VF15,VF13 iaddiu VI03,VI03,0x00007fff + NOP mfir.w VF15,VI03 + NOP sq.xyz VF07,3(VI04) + NOP sq VF11,4(VI04) + NOP iaddiu VI01,VI00,0x0000004d + NOP sq VF15,5(VI04) + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_constant_color_pp4_vcl_9] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmIndexedConstColor_CodeEnd: +; iCount=128 +; register stats: +; 13 VU User integer +; 17 VU User floating point diff --git a/vu1/indexed_no_lights_pvc.vcl b/vu1/indexed_no_lights_pvc.vcl new file mode 100644 index 0000000..cd79be8 --- /dev/null +++ b/vu1/indexed_no_lights_pvc.vcl @@ -0,0 +1,106 @@ + #include "vu1_mem_indexed.h" + + .include "db_in_sb_out.i" + .include "math.i" + .include "lighting.i" + .include "clip_cull.i" + .include "geometry.i" + .include "io.i" + .include "general.i" + +kInputQPerV .equ 4 +kOutputQPerV .equ 3 + + .init_vf_all + .init_vi_all + + .name vsmIndexedPVC + + --enter + --endenter + + load_vert_xfrm vert_xform + + --cont + +main_loop_lid: + init_constants + init_clip_cnst + + xtop buffer_top + iaddiu next_output, vi00, kOutputGeomStart + iaddiu input_start, buffer_top, kInputGeomStart + + iaddiu next_index, buffer_top, kInputGeomStart + iaddiu first_index_mask, vi00, 0xff + loi 253.0 + maxi.w index_constants, vf00, i + loi 4.0 + maxi.z index_constants, vf00, i + loi 255.0 + maxi.y index_constants, vf00, i + + ilw.y num_indices_d2, kNumIndicesD2(buffer_top) + iadd last_index, next_index, num_indices_d2 + ilw.z num_indices, kNumIndices(buffer_top) + + lq gif_tag, kGifTag(vi00) + mtir eop, gif_tagx + ior eop, eop, num_indices + mfir.x gif_tag, eop + mfir.w gif_tag, next_output + sq gif_tag, kOutputBufStart(vi00) + + iaddiu zero_giftag, vi00, kGifTag + xgkick zero_giftag + +xform_loop_lid: --LoopCS 1,3 + ilw.w first_index, 0(next_index) + iand first_index, first_index, first_index_mask + iadd first_offset, first_index, first_index + iadd first_offset, first_offset, first_offset + + lqi.w indices, (next_index++) + addy.w second_ind, indices, index_constants[y] + mtir second_index, second_ind[w] + mulz.w second_off, indices, index_constants[z] + add.w second_off, second_off, index_constants[w] + mtir second_offset, second_off[w] + + .macro do_vert + load_vert vert + xform_vert xformed_vert, vert_xform, vert + vert_to_gs gs_vert, xformed_vert + clip_vert xformed_vert + fcand vi01, 0x003ffff + iand vi01, vi01, do_clipping + set_adc_fs gs_vert, vi00 + store_xyzf gs_vert + + iaddiu color_qw, next_input, kColorQwOff ;TODO: this probably as a macro, but just emphatic for now... + lq vert_color, 0(color_qw) + loi 255.0 + muli vert_color, vert_color, i + max vert_color, vert_color, vf00 + ftoi0 vert_color, vert_color + store_rgba vert_color + + load_stq tex_stq + xform_tex_stq tex_stq, tex_stq, q + store_stq tex_stq + .endm + + iadd next_input, first_offset, input_start + do_vert + + iadd next_input, second_offset, input_start + iaddiu next_output, next_output, kOutputQPerV + do_vert + + next_o + ibne next_index, last_index, xform_loop_lid + kick_to_gs + +--cont + b main_loop_lid +.END diff --git a/vu1/indexed_no_lights_pvc_vcl.vsm b/vu1/indexed_no_lights_pvc_vcl.vsm new file mode 100644 index 0000000..d6fe2c0 --- /dev/null +++ b/vu1/indexed_no_lights_pvc_vcl.vsm @@ -0,0 +1,146 @@ +; === __LP__ EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=39 clid=0 mlid=1 size=(39) +; === normal1 : optimal=39 clid=0 mlid=2 size=(39) +; === vuta : optimal=39 clid=0 mlid=1 size=(39) +; === vuta1 : optimal=39 clid=0 mlid=2 size=(39) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmIndexedPVC_CodeStart + .global vsmIndexedPVC_CodeEnd +vsmIndexedPVC_CodeStart: +__v_vu1_indexed_no_lights_pvc_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_indexed_no_lights_pvc_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 50 [48 0] 55 [main_loop_lid] + NOP loi 0x44fff000 + addi.xy VF05,VF00,I loi 0x45000000 + NOP xtop VI04 + NOP iaddiu VI03,VI00,0x0000004e + NOP iaddiu VI05,VI04,0x00000005 + NOP lq.xyz VF10,75(VI00) + NOP ilw.z VI06,0(VI04) + NOP ilw.y VI07,0(VI04) + NOP mfir.w VF10,VI03 + NOP mtir VI08,VF10x + NOP ior VI06,VI08,VI06 + NOP mfir.x VF10,VI06 + maxi.w VF07,VF00,I loi 0x437d0000 + NOP iaddiu VI01,VI00,0x0000004b + NOP iadd VI07,VI05,VI07 + NOP sq VF10,77(VI00) + NOP xgkick VI01 + NOP lqi.w VF05,(VI05++) + NOP ilw.w VI01,-1(VI05) + maxi.w VF08,VF00,I loi 0x40800000 + maxi.z VF09,VF00,I iaddiu VI04,VI04,0x00000005 + NOP iaddiu VI06,VI00,0x000000ff + NOP iand VI01,VI01,VI06 + NOP iadd VI01,VI01,VI01 + mulz.w VF05,VF05,VF09z iadd VI01,VI01,VI01 + NOP iadd VI01,VI01,VI04 + NOP lq.xyz VF08,0(VI01) + add.w VF05,VF05,VF08 iaddiu VI09,VI01,0 ; STALL_LATENCY ?1 + mulax ACC,VF01,VF08x lq VF12,3(VI09) ; STALL_LATENCY ?1 + madday ACC,VF02,VF08y loi 0x437f0000 + maddaz ACC,VF03,VF08z mtir VI08,VF05w + maddw VF10,VF04,VF00w lq.xyz VF08,2(VI01) + muli VF12,VF12,I iadd VI01,VI08,VI04 + NOP lq.xyz VF07,0(VI01) + NOP div Q,VF00w,VF10w ; STALL_LATENCY ?1 + max VF12,VF12,VF00 fcset 0 + mulax ACC,VF01,VF07x lq.xyz VF06,76(VI00) + madday ACC,VF02,VF07y lq.w VF06,57(VI00) + maddaz ACC,VF03,VF07z NOP + maddw VF11,VF04,VF00w NOP + mulq.xyz VF13,VF10,Q mr32.z VF05,VF06 ; STALL_LATENCY ?1 + ftoi0 VF12,VF12 div Q,VF00w,VF11w ; STALL_LATENCY ?1 + mulq.xyz VF07,VF08,Q ilw.w VI02,76(VI00) + add.xyz VF08,VF13,VF05 iaddiu VI08,VI01,0 + mul.xyz VF13,VF13,VF06 loi 0x437f0000 + NOP lq VF10,3(VI08) + NOP sq VF12,1(VI03) + NOP sq.xyz VF07,0(VI03) + clipw.xyz VF13xyz,VF07w ibeq VI05,VI07,EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__EPI0 + NOP lq.xyz VF07,2(VI01) +EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 39 [38 38] 39 [EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__MAIN_LOOP] + NOP lqi.w VF05,(VI05++) + mulq.xyz VF12,VF11,Q ilw.w VI08,-1(VI05) + NOP fcand VI01,262143 + NOP iand VI01,VI01,VI02 + mulz.w VF05,VF05,VF09z ior VI01,VI01,VI00 + add.xyz VF11,VF12,VF05 iand VI08,VI08,VI06 + mul.xyz VF13,VF12,VF06 iadd VI08,VI08,VI08 + muli VF12,VF10,I iadd VI08,VI08,VI08 + add.w VF05,VF05,VF08 iadd VI08,VI08,VI04 + ftoi4.xyz VF10,VF08 lq.xyz VF08,0(VI08) + clipw.xyz VF13xyz,VF07w iaddiu VI01,VI01,0x00007fff + NOP mfir.w VF10,VI01 + max VF12,VF12,VF00 mtir VI01,VF05w + mulax ACC,VF01,VF08x iaddiu VI09,VI08,0 + madday ACC,VF02,VF08y lq VF13,3(VI09) + maddaz ACC,VF03,VF08z sq VF10,2(VI03) + maddw VF10,VF04,VF00w loi 0x437f0000 + NOP lq.xyz VF08,2(VI08) + muli VF13,VF13,I iadd VI08,VI01,VI04 + NOP lq.xyz VF14,0(VI08) + ftoi0 VF12,VF12 div Q,VF00w,VF10w + ftoi4.xyz VF11,VF11 fcand VI01,262143 + max VF13,VF13,VF00 iand VI01,VI01,VI02 + mulax ACC,VF01,VF14x ior VI01,VI01,VI00 + madday ACC,VF02,VF14y iaddiu VI01,VI01,0x00007fff + maddaz ACC,VF03,VF14z mfir.w VF11,VI01 + mulq.xyz VF14,VF07,Q NOP + mulq.xyz VF15,VF10,Q iaddiu VI01,VI08,0 + mulq.xyz VF07,VF08,Q lq VF10,3(VI01) + maddw VF11,VF04,VF00w sq VF11,5(VI03) + ftoi0 VF13,VF13 loi 0x437f0000 + add.xyz VF08,VF15,VF05 sq VF12,4(VI03) + mul.xyz VF12,VF15,VF06 sq.xyz VF14,3(VI03) + NOP div Q,VF00w,VF11w + NOP iaddiu VI03,VI03,0x00000006 + NOP sq.xyz VF07,0(VI03) + clipw.xyz VF12xyz,VF07w sq VF13,1(VI03) + NOP ibne VI05,VI07,EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__MAIN_LOOP + NOP lq.xyz VF07,2(VI08) +EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 23 [21 0] 26 [EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__EPI0] + NOP NOP + mulq.xyz VF09,VF11,Q NOP + NOP fcand VI01,262143 + add.xyz VF11,VF09,VF05 iand VI01,VI01,VI02 ; STALL_LATENCY ?2 + mul.xyz VF05,VF09,VF06 ior VI01,VI01,VI00 + NOP iaddiu VI01,VI01,0x00007fff + muli VF06,VF10,I mfir.w VF10,VI01 + ftoi4.xyz VF10,VF08 NOP + clipw.xyz VF05xyz,VF07w NOP + max VF08,VF06,VF00 NOP ; STALL_LATENCY ?1 + NOP sq VF10,2(VI03) + NOP fcand VI01,262143 + mulq.xyz VF07,VF07,Q iand VI02,VI01,VI02 + ftoi0 VF10,VF08 ior VI02,VI02,VI00 + ftoi4.xyz VF11,VF11 iaddiu VI02,VI02,0x00007fff + NOP mfir.w VF11,VI02 + NOP sq.xyz VF07,3(VI03) + NOP sq VF10,4(VI03) + NOP iaddiu VI01,VI00,0x0000004d + NOP sq VF11,5(VI03) + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_no_lights_pvc_pp4_vcl_9] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmIndexedPVC_CodeEnd: +; iCount=119 +; register stats: +; 10 VU User integer +; 16 VU User floating point diff --git a/vu1/indexed_vcl.vsm b/vu1/indexed_vcl.vsm index ba7a6c8..c61d2ec 100644 --- a/vu1/indexed_vcl.vsm +++ b/vu1/indexed_vcl.vsm @@ -204,10 +204,10 @@ EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: NOP ilw.x VI03,0(VI02) pt_lights_lid: ; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lighting_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) + NOP ilw.y VI02,0(VI00) + NOP iaddiu VI03,VI00,0x00000001 + NOP ibeq VI02,VI00,done_lighting_lid ; STALL_LATENCY ?2 + NOP ilw.y VI04,0(VI03) ; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_13] maxw.z VF09,VF00,VF00w NOP pt_light_loop_lid: @@ -215,19 +215,19 @@ pt_light_loop_lid: NOP NOP NOP NOP NOP lq.xyz VF10,71(VI00) - NOP lq.xyz VF11,3(VI03) + NOP lq.xyz VF11,3(VI04) NOP lq.xyz VF14,72(VI00) mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) - NOP xtop VI06 - maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 - maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) - sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI05,VI07,VI05 + NOP xtop VI07 + maddaz.xyz ACC,VF13,VF11z iaddiu VI05,VI07,0x00000005 ; STALL_LATENCY ?1 + maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI05) + sub.xyz VF17,VF14,VF17 ilw.x VI06,0(VI07) ; STALL_LATENCY ?3 + mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI04) ; STALL_LATENCY ?3 + maddx.z VF16,VF09,VF16x iadd VI08,VI05,VI06 + NOP iadd VI08,VI08,VI06 + NOP iadd VI06,VI08,VI06 NOP sqrt Q,VF16z ; STALL_LATENCY ?1 NOP NOP NOP NOP @@ -235,22 +235,22 @@ pt_light_loop_lid: NOP NOP NOP NOP NOP NOP - addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) + addq.y VF16,VF00,Q lq.xyz VF10,0(VI04) NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 NOP NOP NOP NOP NOP NOP NOP NOP NOP NOP - addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) - mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 + addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI04) + mulq.xyz VF17,VF17,Q iaddiu VI05,VI05,0x00000003 NOP NOP - mul.xyz VF12,VF12,VF07 ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0x000000ac + mul.xyz VF12,VF12,VF07 ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF16,VF16,VF13 iaddiu VI07,VI07,0x000000ac ; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) + add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI05) sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 - mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 + mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI05) ; STALL_LATENCY ?3 adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 maddx.z VF21,VF09,VF21x NOP NOP waitp @@ -265,12 +265,12 @@ pt_light_loop_lid: NOP NOP mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 mul.xyz VF21,VF17,VF20 NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 NOP move.xyz VF19,VF18 - NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1 + NOP ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1 mulax.w ACC,VF00,VF21x mfp.w VF06,P ; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI05) NOP NOP NOP NOP mulw.xyz VF22,VF19,VF06w NOP @@ -293,18 +293,18 @@ pt_light_loop_lid: addq.y VF21,VF00,Q waitq mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 addw.x VF21,VF00,VF00w NOP - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) maddz.w VF05,VF00,VF15z NOP mul.w VF06,VF06,VF06 NOP mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 mul.xyz VF21,VF17,VF20 NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0 + NOP ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0 mulax.w ACC,VF00,VF21x mfp.w VF06,P EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: ; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI05) mul.w VF07,VF07,VF07 NOP madday.w ACC,VF00,VF21y NOP mulw.xyz VF22,VF19,VF06w NOP @@ -320,7 +320,7 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: maddx.z VF21,VF09,VF21x NOP madday.w ACC,VF00,VF20y NOP maddz.w VF07,VF00,VF20z mfp.w VF06,P - mulq.xyz VF23,VF22,Q lq.xyz VF22,0(VI06) + mulq.xyz VF23,VF22,Q lq.xyz VF22,0(VI07) mulax.w ACC,VF00,VF15x sqrt Q,VF21z madday.w ACC,VF00,VF15y NOP maxx.w VF07,VF07,VF00x NOP @@ -329,9 +329,9 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: addw.x VF21,VF00,VF00w NOP mul.w VF06,VF07,VF07 NOP addq.y VF21,VF00,Q NOP - mula.xyz ACC,VF20,VF06 sqi.xyz VF22,(VI06++) + mula.xyz ACC,VF20,VF06 sqi.xyz VF22,(VI07++) maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) + mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI05) mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y NOP NOP NOP NOP @@ -339,9 +339,9 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: mul.xyz VF21,VF17,VF20 NOP NOP NOP NOP NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibne VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + NOP ibne VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP mulax.w ACC,VF00,VF21x mfp.w VF06,P EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: ; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0] @@ -366,12 +366,12 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: maxx.w VF07,VF07,VF00x NOP mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF21,Q lq.xyz VF21,0(VI06) + mulq.xyz VF19,VF21,Q lq.xyz VF21,0(VI07) mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 - mul.xyz VF21,VF17,VF20 sqi.xyz VF21,(VI06++) ; STALL_LATENCY ?1 + mul.xyz VF21,VF17,VF20 sqi.xyz VF21,(VI07++) ; STALL_LATENCY ?1 mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 mul.w VF07,VF07,VF07 NOP @@ -395,16 +395,16 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: mulax.w ACC,VF00,VF15x NOP madday.w ACC,VF00,VF15y NOP mul.w VF07,VF06,VF06 NOP - maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI06) + maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI07) mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 madd.xyz VF10,VF10,VF05 NOP - NOP sqi.xyz VF11,(VI06++) - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?2 + NOP sqi.xyz VF11,(VI07++) + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?2 add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?2 + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?2 EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: ; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1] add.xyz VF13,VF08,VF17 NOP @@ -423,7 +423,7 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) NOP waitp ; STALL_LATENCY ?1 mul.w VF07,VF06,VF06 mfp.w VF06,P mul.xyz VF21,VF17,VF20 NOP @@ -450,19 +450,19 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: mulax.w ACC,VF00,VF15x NOP madday.w ACC,VF00,VF15y NOP mul.w VF07,VF06,VF06 NOP - maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI06) + maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI07) mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 madd.xyz VF10,VF10,VF05 NOP - NOP sqi.xyz VF11,(VI06++) - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?2 + NOP sqi.xyz VF11,(VI07++) + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?2 add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?2 + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?2 EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2: ; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) + add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI05) mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 madday.w ACC,VF00,VF17y NOP @@ -508,211 +508,237 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2: NOP div Q,VF00w,VF05w maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?3 + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?3 add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?3 + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?3 EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: ; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) + NOP isubiu VI02,VI02,0x00000001 + NOP iaddiu VI03,VI03,0x00000001 + NOP ibne VI02,VI00,pt_light_loop_lid + NOP ilw.y VI04,0(VI03) done_lighting_lid: -; _LNOPT_w=[ vuta ] 51 [50 0] 58 [done_lighting_lid] - NOP lq.w VF05,60(VI00) - NOP loi 0x43000000 - muli.w VF10,VF05,I xtop VI04 ; STALL_LATENCY ?2 - NOP ilw.y VI08,0(VI04) - NOP loi 0x437f0000 - maxi.w VF12,VF00,I ilw.z VI03,0(VI04) - minii.w VF10,VF10,I loi 0x437f0000 - NOP lq.xyz VF05,75(VI00) +; _LNOPT_w=[ normal2 ] 12 [12 0] 12 [done_lighting_lid] + NOP loi 0x44fff000 + NOP lq.w VF06,57(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP xtop VI04 + NOP fcset 0 + NOP mr32.z VF05,VF06 + NOP lq.xyz VF06,76(VI00) + NOP ilw.w VI03,76(VI00) + NOP iaddiu VI05,VI00,0x0000004e NOP iaddiu VI06,VI04,0x00000005 - maxi.y VF10,VF00,I loi 0x40400000 - NOP mtir VI02,VF05x ; STALL_LATENCY ?1 - NOP ior VI03,VI02,VI03 - NOP mfir.x VF05,VI03 - NOP iaddiu VI03,VI00,0x0000004e - NOP mfir.w VF05,VI03 - maxi.z VF09,VF00,I loi 0x437d0000 - NOP iadd VI08,VI06,VI08 - NOP ilw.w VI09,0(VI06) - NOP sq VF05,77(VI00) - NOP lqi.w VF05,(VI06++) - NOP iaddiu VI05,VI04,0x000000ac - NOP iaddiu VI04,VI04,0x00000005 - NOP iaddiu VI07,VI00,0x000000ff - NOP iand VI09,VI09,VI07 - maxi.w VF08,VF00,I iadd VI01,VI09,VI09 - addy.w VF06,VF05,VF10y iadd VI01,VI01,VI09 - mulz.w VF05,VF05,VF09z iadd VI10,VI01,VI04 - NOP lq.xyz VF11,0(VI10) - add.w VF05,VF05,VF08 lq.w VF09,57(VI00) ; STALL_LATENCY ?2 - mulax ACC,VF01,VF11x loi 0x45000000 - madday ACC,VF02,VF11y NOP - maddaz ACC,VF03,VF11z iadd VI09,VI09,VI05 - maddw VF13,VF04,VF00w lq.xyz VF07,0(VI09) - NOP mtir VI11,VF05w - NOP div Q,VF00w,VF13w ; STALL_LATENCY ?2 - NOP iadd VI09,VI11,VI04 - NOP lq.xyz VF12,0(VI09) - maxi.w VF07,VF00,I mr32.z VF05,VF09 - miniw.xyz VF11,VF07,VF12w loi 0x44fff000 - addi.xy VF05,VF00,I iaddiu VI02,VI00,0x0000004b - mulax ACC,VF01,VF12x xgkick VI02 - mulq.xyz VF08,VF13,Q ilw.w VI02,76(VI00) - madday ACC,VF02,VF12y lq.xyz VF06,76(VI00) - maddaz ACC,VF03,VF12z fcset 0 - maddw VF16,VF04,VF00w mtir VI01,VF06w - add.xyz VF13,VF08,VF05 lq.xyz VF14,2(VI10) - mul.xyz VF08,VF08,VF06 iadd VI01,VI01,VI05 + NOP ibne VI01,VI00,colors_ready_lid + maxi.w VF07,VF00,I iaddiu VI07,VI04,0x000000ac +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_19] + NOP ibne VI02,VI00,colors_ready_lid + NOP NOP +; _LNOPT_w=[ normal2 ] 6 [9 0] 10 [__v_vu1_indexed_pp4_vcl_20] + NOP lq.xyz VF08,57(VI00) + NOP lq.xyz VF09,59(VI00) + mul.xyz VF08,VF08,VF09 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?3 + NOP ilw.x VI01,0(VI04) + NOP iaddiu VI02,VI04,0x000000ac + add.xyz VF07,VF07,VF08 iaddiu VI08,VI00,0 ; STALL_LATENCY ?1 +seed_const_colors_lid: +; _LNOPT_w=[ normal2 ] 4 [4 3] 4 [seed_const_colors_lid] + NOP iaddiu VI08,VI08,0x00000001 + NOP NOP + NOP ibne VI08,VI01,seed_const_colors_lid + NOP sqi.xyz VF07,(VI02++) +colors_ready_lid: +; _LNOPT_w=[ vuta1 ] 47 [40 0] 53 [colors_ready_lid] + NOP loi 0x40400000 + maxi.z VF07,VF00,I loi 0x437f0000 + NOP lq.w VF05,60(VI00) + NOP lq.xyz VF09,75(VI00) + NOP ilw.z VI02,0(VI04) + maxi.y VF08,VF00,I loi 0x43000000 + NOP ilw.y VI09,0(VI04) + NOP mtir VI08,VF09x + NOP ior VI02,VI08,VI02 + muli.w VF05,VF05,I loi 0x437f0000 + NOP mfir.x VF09,VI02 + NOP mfir.w VF09,VI05 + NOP iaddiu VI02,VI04,0x00000005 + NOP iadd VI09,VI02,VI09 + NOP iaddiu VI04,VI00,0x0000004b + NOP sq VF09,77(VI00) + NOP xgkick VI04 + NOP ilw.w VI04,0(VI02) + minii.w VF08,VF05,I lqi.w VF14,(VI02++) + maxi.w VF06,VF00,I loi 0x437d0000 + NOP iaddiu VI08,VI00,0x000000ff + NOP iand VI04,VI04,VI08 + addy.w VF12,VF14,VF08y iadd VI01,VI04,VI04 + maxi.w VF05,VF00,I iadd VI01,VI01,VI04 + mulz.w VF11,VF14,VF07z iadd VI10,VI01,VI06 + NOP lq.xyz VF09,0(VI10) + add.w VF11,VF11,VF05 NOP ; STALL_LATENCY ?2 + mulax ACC,VF01,VF09x NOP + madday ACC,VF02,VF09y NOP + maddaz ACC,VF03,VF09z iadd VI04,VI04,VI07 + maddw VF13,VF04,VF00w mtir VI11,VF11w + NOP lq.xyz VF09,0(VI04) + NOP iadd VI04,VI11,VI06 + NOP lq.xyz VF12,0(VI04) + NOP div Q,VF00w,VF13w + miniw.xyz VF09,VF09,VF06w NOP + mulax ACC,VF01,VF12x NOP ; STALL_LATENCY ?1 + madday ACC,VF02,VF12y NOP + maddaz ACC,VF03,VF12z NOP + mulq.xyz VF11,VF13,Q waitq ; STALL_LATENCY ?1 + maddw VF16,VF04,VF00w mtir VI01,VF12w + add.xyz VF13,VF11,VF05 lq.xyz VF14,2(VI10) ; STALL_LATENCY ?2 + mul.xyz VF11,VF11,VF06 iadd VI01,VI01,VI07 NOP lq.xyz VF12,0(VI01) - ftoi0.w VF11,VF10 div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibeq VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1 - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI09) + ftoi0.w VF09,VF08 div Q,VF00w,VF16w + ftoi0.xyz VF09,VF09 ibeq VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1 + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI04) ; _LNOPT_w=[ ] 38 [37 0] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__PRO1] - mulq.xyz VF15,VF14,Q ilw.w VI11,0(VI06) + mulq.xyz VF15,VF14,Q ilw.w VI11,0(VI02) NOP NOP - NOP lqi.w VF05,(VI06++) - NOP sq VF11,1(VI03) - mulq.xyz VF11,VF16,Q fcand VI01,262143 - NOP iand VI11,VI11,VI07 + NOP lqi.w VF14,(VI02++) + NOP sq VF09,1(VI05) + mulq.xyz VF09,VF16,Q fcand VI01,262143 + NOP iand VI11,VI11,VI08 NOP iadd VI10,VI11,VI11 - addy.w VF06,VF05,VF10y iadd VI10,VI10,VI11 - mulz.w VF05,VF05,VF09z iadd VI12,VI10,VI04 - mul.xyz VF07,VF11,VF06 iadd VI11,VI11,VI05 - add.xyz VF14,VF11,VF05 lq.xyz VF11,0(VI12) - ftoi4.xyz VF16,VF13 iand VI09,VI01,VI02 - add.w VF05,VF05,VF08 ior VI09,VI09,VI00 - clipw.xyz VF07xyz,VF07w iaddiu VI01,VI09,0x00007fff - mulax ACC,VF01,VF11x mfir.w VF16,VI01 - madday ACC,VF02,VF11y mtir VI10,VF06w - maddaz ACC,VF03,VF11z mtir VI09,VF05w - maddw VF13,VF04,VF00w lq.xyz VF07,0(VI11) - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) - NOP iadd VI11,VI09,VI04 - NOP sq.xyz VF15,0(VI03) + mulz.w VF11,VF14,VF07z iadd VI10,VI10,VI11 + mul.xyz VF10,VF09,VF06 iadd VI12,VI10,VI06 + add.xyz VF14,VF09,VF05 iadd VI11,VI11,VI07 + addy.w VF12,VF14,VF08y lq.xyz VF09,0(VI12) + ftoi4.xyz VF16,VF13 iand VI04,VI01,VI03 + add.w VF11,VF11,VF05 ior VI04,VI04,VI00 + clipw.xyz VF10xyz,VF07w iaddiu VI01,VI04,0x00007fff + mulax ACC,VF01,VF09x mfir.w VF16,VI01 + madday ACC,VF02,VF09y mtir VI10,VF12w + maddaz ACC,VF03,VF09z mtir VI04,VF11w + maddw VF13,VF04,VF00w lq.xyz VF10,0(VI11) + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) + NOP iadd VI11,VI04,VI06 + NOP sq.xyz VF15,0(VI05) NOP div Q,VF00w,VF13w - miniw.xyz VF11,VF07,VF12w iaddiu VI09,VI03,0 - miniw.xyz VF07,VF12,VF12w lq.xyz VF12,0(VI11) + miniw.xyz VF09,VF10,VF06w iaddiu VI04,VI05,0 + miniw.xyz VF10,VF12,VF06w lq.xyz VF12,0(VI11) NOP fcand VI01,262143 ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) - NOP iand VI03,VI01,VI02 - mulax ACC,VF01,VF12x ior VI03,VI03,VI00 - mulq.xyz VF08,VF13,Q iaddiu VI03,VI03,0x00007fff - madday ACC,VF02,VF12y mfir.w VF15,VI03 - maddaz ACC,VF03,VF12z iadd VI10,VI10,VI05 - maddw VF16,VF04,VF00w sq.xyz VF16,3(VI09) - add.xyz VF13,VF08,VF05 iaddiu VI03,VI09,0x00000006 - mul.xyz VF08,VF08,VF06 lq.xyz VF12,0(VI10) - NOP sq VF15,5(VI09) + NOP iand VI05,VI01,VI03 + mulax ACC,VF01,VF12x ior VI05,VI05,VI00 + mulq.xyz VF11,VF13,Q iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF12y mfir.w VF15,VI05 + maddaz ACC,VF03,VF12z iadd VI10,VI10,VI07 + maddw VF16,VF04,VF00w sq.xyz VF16,3(VI04) + add.xyz VF13,VF11,VF05 iaddiu VI05,VI04,0x00000006 + mul.xyz VF11,VF11,VF06 lq.xyz VF12,0(VI10) + NOP sq VF15,5(VI04) NOP div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibeq VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0 - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI11) + ftoi0.xyz VF09,VF09 ibeq VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0 + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI11) EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: ; _LPOPT_w=[ ldumb ] 38 [38 38] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP] - NOP ilw.w VI11,0(VI06) - NOP lqi.w VF05,(VI06++) - NOP sq VF11,1(VI03) + NOP ilw.w VI11,0(VI02) + NOP lqi.w VF14,(VI02++) + NOP sq VF09,1(VI05) mulq.xyz VF15,VF14,Q fcand VI01,262143 - mulq.xyz VF11,VF16,Q iand VI11,VI11,VI07 - addy.w VF06,VF05,VF10y iadd VI10,VI11,VI11 - mulz.w VF05,VF05,VF09z iadd VI10,VI10,VI11 - NOP iadd VI12,VI10,VI04 - add.xyz VF14,VF11,VF05 iadd VI11,VI11,VI05 - mul.xyz VF18,VF11,VF06 mtir VI10,VF06w - add.w VF05,VF05,VF08 lq.xyz VF17,0(VI12) - ftoi4.xyz VF16,VF13 iand VI01,VI01,VI02 - ftoi0.xyz VF11,VF07 ior VI01,VI01,VI00 + mulq.xyz VF09,VF16,Q iand VI11,VI11,VI08 + addy.w VF12,VF14,VF08y iadd VI10,VI11,VI11 + mulz.w VF11,VF14,VF07z iadd VI10,VI10,VI11 + NOP iadd VI12,VI10,VI06 + add.xyz VF14,VF09,VF05 iadd VI11,VI11,VI07 + mul.xyz VF18,VF09,VF06 mtir VI10,VF12w + add.w VF11,VF11,VF05 lq.xyz VF17,0(VI12) + ftoi4.xyz VF16,VF13 iand VI01,VI01,VI03 + ftoi0.xyz VF09,VF10 ior VI01,VI01,VI00 clipw.xyz VF18xyz,VF07w iaddiu VI13,VI01,0x00007fff - mulax ACC,VF01,VF17x mtir VI01,VF05w + mulax ACC,VF01,VF17x mtir VI01,VF11w madday ACC,VF02,VF17y mfir.w VF16,VI13 - maddaz ACC,VF03,VF17z lq.xyz VF07,0(VI11) - maddw VF13,VF04,VF00w iadd VI11,VI01,VI04 - NOP sq.xyz VF15,0(VI03) - NOP sq VF16,2(VI03) - miniw.xyz VF11,VF07,VF12w sq VF11,4(VI09) + maddaz ACC,VF03,VF17z lq.xyz VF10,0(VI11) + maddw VF13,VF04,VF00w iadd VI11,VI01,VI06 + NOP sq.xyz VF15,0(VI05) + NOP sq VF16,2(VI05) + miniw.xyz VF09,VF10,VF06w sq VF09,4(VI04) NOP div Q,VF00w,VF13w - miniw.xyz VF07,VF12,VF12w iaddiu VI09,VI03,0 + miniw.xyz VF10,VF12,VF06w iaddiu VI04,VI05,0 NOP lq.xyz VF12,0(VI11) NOP fcand VI01,262143 ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) - mulq.xyz VF16,VF08,Q iand VI03,VI01,VI02 - mulax ACC,VF01,VF12x ior VI03,VI03,VI00 - mulq.xyz VF08,VF13,Q iaddiu VI03,VI03,0x00007fff - madday ACC,VF02,VF12y mfir.w VF15,VI03 - maddaz ACC,VF03,VF12z iadd VI10,VI10,VI05 - maddw VF16,VF04,VF00w sq.xyz VF16,3(VI09) - add.xyz VF13,VF08,VF05 iaddiu VI03,VI09,0x00000006 - mul.xyz VF08,VF08,VF06 lq.xyz VF12,0(VI10) - NOP sq VF15,5(VI09) + mulq.xyz VF16,VF11,Q iand VI05,VI01,VI03 + mulax ACC,VF01,VF12x ior VI05,VI05,VI00 + mulq.xyz VF11,VF13,Q iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF12y mfir.w VF15,VI05 + maddaz ACC,VF03,VF12z iadd VI10,VI10,VI07 + maddw VF16,VF04,VF00w sq.xyz VF16,3(VI04) + add.xyz VF13,VF11,VF05 iaddiu VI05,VI04,0x00000006 + mul.xyz VF11,VF11,VF06 lq.xyz VF12,0(VI10) + NOP sq VF15,5(VI04) NOP div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibne VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI11) + ftoi0.xyz VF09,VF09 ibne VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI11) EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0: ; _LNOPT_w=[ ] 25 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0] - mulq.xyz VF09,VF14,Q NOP + mulq.xyz VF07,VF14,Q NOP NOP NOP NOP NOP NOP NOP - mulq.xyz VF11,VF16,Q sq VF11,1(VI03) + mulq.xyz VF09,VF16,Q sq VF09,1(VI05) NOP NOP NOP fcand VI01,262143 - NOP iand VI01,VI01,VI02 + NOP iand VI01,VI01,VI03 ftoi4.xyz VF16,VF13 ior VI01,VI01,VI00 - mul.xyz VF06,VF11,VF06 iaddiu VI01,VI01,0x00007fff - add.xyz VF14,VF11,VF05 mfir.w VF16,VI01 - ftoi0.xyz VF11,VF07 NOP - clipw.xyz VF06xyz,VF07w sq.xyz VF09,0(VI03) ; STALL_LATENCY ?1 - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) - miniw.xyz VF07,VF12,VF12w sq VF11,4(VI09) - NOP iaddiu VI09,VI03,0 + mul.xyz VF06,VF09,VF06 iaddiu VI01,VI01,0x00007fff + add.xyz VF14,VF09,VF05 mfir.w VF16,VI01 + ftoi0.xyz VF09,VF10 NOP + clipw.xyz VF06xyz,VF07w sq.xyz VF07,0(VI05) ; STALL_LATENCY ?1 + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) + miniw.xyz VF10,VF12,VF06w sq VF09,4(VI04) + NOP iaddiu VI04,VI05,0 NOP fcand VI01,262143 - NOP iand VI03,VI01,VI02 - ftoi0.xyz VF11,VF07 ior VI03,VI03,VI00 - ftoi4.xyz VF12,VF14 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF12,VI03 - NOP sq.xyz VF16,3(VI09) - NOP sq VF11,4(VI09) + NOP iand VI05,VI01,VI03 + ftoi0.xyz VF09,VF10 ior VI05,VI05,VI00 + ftoi4.xyz VF12,VF14 iaddiu VI05,VI05,0x00007fff + NOP mfir.w VF12,VI05 + NOP sq.xyz VF16,3(VI04) + NOP sq VF09,4(VI04) NOP b EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF12,5(VI09) + NOP sq VF12,5(VI04) EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1: ; _LNOPT_w=[ ] 23 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1] mulq.xyz VF07,VF14,Q NOP NOP NOP - NOP sq VF11,1(VI03) + NOP sq VF09,1(VI05) NOP fcand VI01,262143 - mulq.xyz VF11,VF16,Q iand VI04,VI01,VI02 - NOP ior VI04,VI04,VI00 - ftoi4.xyz VF16,VF13 iaddiu VI04,VI04,0x00007fff - NOP mfir.w VF16,VI04 - add.xyz VF14,VF11,VF05 sq.xyz VF07,0(VI03) - mul.xyz VF11,VF11,VF06 NOP - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) ; STALL_LATENCY ?1 - clipw.xyz VF11xyz,VF07w NOP ; STALL_LATENCY ?1 + mulq.xyz VF09,VF16,Q iand VI02,VI01,VI03 + NOP ior VI02,VI02,VI00 + ftoi4.xyz VF16,VF13 iaddiu VI02,VI02,0x00007fff + NOP mfir.w VF16,VI02 + add.xyz VF14,VF09,VF05 sq.xyz VF07,0(VI05) + mul.xyz VF09,VF09,VF06 NOP + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) ; STALL_LATENCY ?1 + clipw.xyz VF09xyz,VF07w NOP ; STALL_LATENCY ?1 NOP NOP NOP NOP - miniw.xyz VF07,VF12,VF12w iaddiu VI04,VI03,0 + miniw.xyz VF09,VF12,VF06w iaddiu VI02,VI05,0 NOP fcand VI01,262143 - NOP iand VI03,VI01,VI02 - ftoi4.xyz VF12,VF14 ior VI03,VI03,VI00 - ftoi0.xyz VF11,VF07 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF12,VI03 - NOP sq.xyz VF16,3(VI04) - NOP sq VF11,4(VI04) ; STALL_LATENCY ?1 - NOP sq VF12,5(VI04) + NOP iand VI05,VI01,VI03 + ftoi4.xyz VF12,VF14 ior VI05,VI05,VI00 + ftoi0.xyz VF09,VF09 iaddiu VI05,VI05,0x00007fff + NOP mfir.w VF12,VI05 + NOP sq.xyz VF16,3(VI02) + NOP sq VF09,4(VI02) ; STALL_LATENCY ?1 + NOP sq VF12,5(VI02) EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT: ; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT] NOP iaddiu VI01,VI00,0x0000004d NOP xgkick VI01 NOP[E] NOP NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_21] +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_26] NOP b main_loop_lid NOP NOP .align 4 vsmIndexed_CodeEnd: -; iCount=637 +; iCount=657 ; register stats: ; 14 VU User integer ; 24 VU User floating point diff --git a/vu1/io.i b/vu1/io.i index 1ac0571..8e2271e 100644 --- a/vu1/io.i +++ b/vu1/io.i @@ -13,7 +13,7 @@ ; fill in the nloop field in the giftag and store at ; top of the output buffer lq gif_tag\@, kGifTag(vi00) - mtir eop\@, gif_tag\@x + mtir eop\@, gif_tag\@[x] ior eop\@, eop\@, num_verts mfir.x gif_tag\@, eop\@ sq gif_tag\@, -1(next_output) diff --git a/vu1/math.i b/vu1/math.i index 53cb771..ad0762c 100644 --- a/vu1/math.i +++ b/vu1/math.i @@ -39,7 +39,7 @@ .endm .macro normalize_3 output, input, scalar - div q, vf00w, \scalar + div q, vf00[w], \scalar mulq.xyz \output, \input, q .endm diff --git a/vu1/vu1_context.i b/vu1/vu1_context.i new file mode 100644 index 0000000..41478f6 --- /dev/null +++ b/vu1/vu1_context.i @@ -0,0 +1,54 @@ +tLightPtrs_dir .equ 0 +tLightPtrs_point .equ 1 +tLightPtrs_spot .equ 2 +tLightPtrs_dummy .equ 3 + +kNumLights .equ kContextStart +kBackFaceCullMult .equ kNumLights + +kLightPointers0 .equ (kNumLights + 1) +kLightPointers1 .equ (kLightPointers0 + 1) +kLightPointers2 .equ (kLightPointers1 + 1) +kLightPointers3 .equ (kLightPointers2 + 1) +kLightPointers4 .equ (kLightPointers3 + 1) +kLightPointers5 .equ (kLightPointers4 + 1) +kLightPointers6 .equ (kLightPointers5 + 1) +kLightPointers7 .equ (kLightPointers6 + 1) + +kLightAmbientOffset .equ 0 +kLightDiffuseOffset .equ 1 +kLightSpecularOffset .equ 2 +kLightPosOffset .equ 3 +kLightSpotDirOffset .equ 4 +kLightAttenCoeffOffset .equ 5 + +kLightStructSize .equ 6 + +kLight0Base .equ (kLightPointers7 + 1) +kLight1Base .equ (kLight0Base + kLightStructSize) +kLight2Base .equ (kLight1Base + kLightStructSize) +kLight3Base .equ (kLight2Base + kLightStructSize) +kLight4Base .equ (kLight3Base + kLightStructSize) +kLight5Base .equ (kLight4Base + kLightStructSize) +kLight6Base .equ (kLight5Base + kLightStructSize) +kLight7Base .equ (kLight6Base + kLightStructSize) + +kGlobalAmbient .equ (kLight7Base + kLightStructSize) + +kClipToGsDepthOffset .equ kGlobalAmbient + +kMaterialEmission .equ (kGlobalAmbient + 1) +kMaterialAmbient .equ (kMaterialEmission + 1) +kMaterialDiffuse .equ (kMaterialAmbient + 1) +kMaterialSpecular .equ (kMaterialDiffuse + 1) + +kVertexXfrm .equ (kMaterialSpecular + 1) +kFixedVertToEye .equ (kVertexXfrm + 4) +kObjToWorldXfrmTrans .equ (kFixedVertToEye + 1) +kWorldToObjXfrm .equ (kObjToWorldXfrmTrans + 4) + +kGifTag .equ (kWorldToObjXfrm + 4) + +kClipInfo .equ (kGifTag + 1) + +kContextLength .equ (kClipInfo - kContextStart + 1) diff --git a/vu1/vu1_mem_indexed.h b/vu1/vu1_mem_indexed.h index a4a3165..c214af0 100644 --- a/vu1/vu1_mem_indexed.h +++ b/vu1/vu1_mem_indexed.h @@ -57,3 +57,4 @@ #define kInputStart kInputGeomStart // temp #define kTempAreaStart (kInputGeomStart + kInputBufSize) +#define kColorQwOff 3 diff --git a/vu1/vu1_mem_linear.i b/vu1/vu1_mem_linear.i new file mode 100644 index 0000000..5d7a9e8 --- /dev/null +++ b/vu1/vu1_mem_linear.i @@ -0,0 +1,17 @@ +kContextStart .equ 0 + +.include "vu1_context.i" + +kDoubleBufBase .equ (kContextStart + kContextLength) +kDoubleBufOffset .equ ((1024 - kDoubleBufBase) / 2) +kDoubleBufSize .equ kDoubleBufOffset + +kNumVertices .equ 0 + +kStripADCs .equ (kNumVertices + 1) + +kInputStart .equ (kStripADCs + 4) + +kInputBufSize .equ (kDoubleBufSize / 2) +kOutputStart .equ (0 + kInputBufSize) +kOutputBufSize .equ (kDoubleBufSize - kOutputStart) diff --git a/vu1/vu1renderers.h b/vu1/vu1renderers.h index 9c26952..d9a42a0 100644 --- a/vu1/vu1renderers.h +++ b/vu1/vu1renderers.h @@ -30,8 +30,11 @@ VU_FUNCTIONS(GeneralPVDiffQuad); VU_FUNCTIONS(SCEI); VU_FUNCTIONS(Fast); VU_FUNCTIONS(FastNoLights); +VU_FUNCTIONS(FastNoLightsPVCTri); VU_FUNCTIONS(Indexed); +VU_FUNCTIONS(IndexedConstColor); +VU_FUNCTIONS(IndexedPVC); } #endif // ps2gl_vu1code_h