open-goal-jak-project/game/graphics/opengl_renderer/background/background_common.cpp

787 lines
28 KiB
C++

#include "background_common.h"
#ifdef __aarch64__
#include "third-party/sse2neon/sse2neon.h"
#else
#include <immintrin.h>
#endif
#include "common/util/os.h"
#include "game/graphics/opengl_renderer/BucketRenderer.h"
#include "game/graphics/pipelines/opengl.h"
DoubleDraw setup_opengl_from_draw_mode(DrawMode mode, u32 tex_unit, bool mipmap) {
glActiveTexture(tex_unit);
if (mode.get_zt_enable()) {
glEnable(GL_DEPTH_TEST);
switch (mode.get_depth_test()) {
case GsTest::ZTest::NEVER:
glDepthFunc(GL_NEVER);
break;
case GsTest::ZTest::ALWAYS:
glDepthFunc(GL_ALWAYS);
break;
case GsTest::ZTest::GEQUAL:
glDepthFunc(GL_GEQUAL);
break;
case GsTest::ZTest::GREATER:
glDepthFunc(GL_GREATER);
break;
default:
ASSERT(false);
}
} else {
glDisable(GL_DEPTH_TEST);
}
DoubleDraw double_draw;
bool should_enable_blend = false;
if (mode.get_ab_enable() && mode.get_alpha_blend() != DrawMode::AlphaBlend::DISABLED) {
should_enable_blend = true;
switch (mode.get_alpha_blend()) {
case DrawMode::AlphaBlend::SRC_SRC_SRC_SRC:
should_enable_blend = false;
// (SRC - SRC) * alpha + SRC = SRC, no blend.
break;
case DrawMode::AlphaBlend::SRC_DST_SRC_DST:
glBlendEquation(GL_FUNC_ADD);
glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
break;
case DrawMode::AlphaBlend::SRC_0_SRC_DST:
glBlendEquation(GL_FUNC_ADD);
glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ONE, GL_ZERO);
break;
case DrawMode::AlphaBlend::SRC_0_FIX_DST:
glBlendEquation(GL_FUNC_ADD);
glBlendFuncSeparate(GL_ONE, GL_ONE, GL_ONE, GL_ZERO);
break;
case DrawMode::AlphaBlend::SRC_DST_FIX_DST:
// Cv = (Cs - Cd) * FIX + Cd
// Cs * FIX * 0.5
// Cd * FIX * 0.5
glBlendEquation(GL_FUNC_ADD);
glBlendFuncSeparate(GL_CONSTANT_COLOR, GL_CONSTANT_COLOR, GL_ONE, GL_ZERO);
glBlendColor(0.5, 0.5, 0.5, 0.5);
break;
case DrawMode::AlphaBlend::ZERO_SRC_SRC_DST:
glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_ONE, GL_ZERO);
glBlendEquation(GL_FUNC_REVERSE_SUBTRACT);
break;
case DrawMode::AlphaBlend::SRC_0_DST_DST:
glBlendFunc(GL_DST_ALPHA, GL_ONE);
glBlendEquation(GL_FUNC_ADD);
double_draw.color_mult = 0.5f;
break;
default:
ASSERT(false);
}
} else {
should_enable_blend = false;
}
if (should_enable_blend) {
glEnable(GL_BLEND);
} else {
glDisable(GL_BLEND);
}
if (mode.get_clamp_s_enable()) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
}
if (mode.get_clamp_t_enable()) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
}
if (mode.get_filt_enable()) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
mipmap ? GL_LINEAR_MIPMAP_LINEAR : GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
} else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
// for some reason, they set atest NEVER + FB_ONLY to disable depth writes
bool alpha_hack_to_disable_z_write = false;
float alpha_min = 0.;
if (mode.get_at_enable()) {
switch (mode.get_alpha_test()) {
case DrawMode::AlphaTest::ALWAYS:
break;
case DrawMode::AlphaTest::GEQUAL:
alpha_min = mode.get_aref() / 127.f;
switch (mode.get_alpha_fail()) {
case GsTest::AlphaFail::KEEP:
// ok, no need for double draw
break;
case GsTest::AlphaFail::FB_ONLY:
if (mode.get_depth_write_enable()) {
// darn, we need to draw twice
double_draw.kind = DoubleDrawKind::AFAIL_NO_DEPTH_WRITE;
double_draw.aref_second = alpha_min;
} else {
alpha_min = 0.f;
}
break;
default:
ASSERT(false);
}
break;
case DrawMode::AlphaTest::NEVER:
if (mode.get_alpha_fail() == GsTest::AlphaFail::FB_ONLY) {
alpha_hack_to_disable_z_write = true;
} else {
ASSERT(false);
}
break;
default:
ASSERT(false);
}
}
if (mode.get_depth_write_enable() && !alpha_hack_to_disable_z_write) {
glDepthMask(GL_TRUE);
} else {
glDepthMask(GL_FALSE);
}
double_draw.aref_first = alpha_min;
return double_draw;
}
DoubleDraw setup_tfrag_shader(SharedRenderState* render_state, DrawMode mode, ShaderId shader) {
auto draw_settings = setup_opengl_from_draw_mode(mode, GL_TEXTURE0, true);
auto sh_id = render_state->shaders[shader].id();
if (auto u_id = glGetUniformLocation(sh_id, "alpha_min"); u_id != -1) {
glUniform1f(u_id, draw_settings.aref_first);
}
if (auto u_id = glGetUniformLocation(sh_id, "alpha_max"); u_id != -1) {
glUniform1f(u_id, 10.f);
}
return draw_settings;
}
void first_tfrag_draw_setup(const TfragRenderSettings& settings,
SharedRenderState* render_state,
ShaderId shader) {
const auto& sh = render_state->shaders[shader];
sh.activate();
auto id = sh.id();
glUniform1i(glGetUniformLocation(id, "gfx_hack_no_tex"), Gfx::g_global_settings.hack_no_tex);
glUniform1i(glGetUniformLocation(id, "decal"), false);
glUniform1i(glGetUniformLocation(id, "tex_T0"), 0);
glUniformMatrix4fv(glGetUniformLocation(id, "camera"), 1, GL_FALSE,
settings.camera.camera[0].data());
glUniform4f(glGetUniformLocation(id, "hvdf_offset"), settings.camera.hvdf_off[0],
settings.camera.hvdf_off[1], settings.camera.hvdf_off[2],
settings.camera.hvdf_off[3]);
glUniform1f(glGetUniformLocation(id, "fog_constant"), settings.camera.fog.x());
glUniform1f(glGetUniformLocation(id, "fog_min"), settings.camera.fog.y());
glUniform1f(glGetUniformLocation(id, "fog_max"), settings.camera.fog.z());
glUniform4f(glGetUniformLocation(id, "fog_color"), render_state->fog_color[0] / 255.f,
render_state->fog_color[1] / 255.f, render_state->fog_color[2] / 255.f,
render_state->fog_intensity / 255);
}
void interp_time_of_day_slow(const math::Vector<s32, 4> itimes[4],
const std::vector<tfrag3::TimeOfDayColor>& in,
math::Vector<u8, 4>* out) {
// Timer interp_timer;
math::Vector4f weights[8];
for (int component = 0; component < 8; component++) {
int quad_idx = component / 2;
int word_off = (component % 2 * 2);
for (int channel = 0; channel < 4; channel++) {
int word = word_off + (channel / 2);
int hw_off = channel % 2;
u32 word_val = itimes[quad_idx][word];
u32 hw_val = hw_off ? (word_val >> 16) : word_val;
hw_val = hw_val & 0xff;
weights[component][channel] = hw_val / 64.f;
}
}
for (size_t color = 0; color < in.size(); color++) {
math::Vector4f result = math::Vector4f::zero();
for (int component = 0; component < 8; component++) {
for (int channel = 0; channel < 4; channel++) {
result[channel] += in[color].rgba[component][channel] * weights[component][channel];
}
// result += in[color].rgba[component].cast<float>() * weights[component];
}
result[0] = std::min(result[0], 255.f);
result[1] = std::min(result[1], 255.f);
result[2] = std::min(result[2], 255.f);
result[3] = std::min(result[3], 128.f); // note: different for alpha!
out[color] = result.cast<u8>();
}
}
// we want to absolutely minimize the number of time we have to "cross lanes" in AVX (meaning X
// component of one vector interacts with Y component of another). We can make this a lot better by
// taking groups of 4 time of day colors (each containing 8x RGBAs) and rearranging them with this
// pattern. We want to compute:
// [rgba][0][0] * weights[0] + [rgba][0][1] * weights[1] + [rgba][0][2]... + rgba[0][7] * weights[7]
// RGBA is already a vector of 4 components, but with AVX we have vectors with 32 bytes which fit
// 16 colors in them.
// This makes each vector have:
// colors0 = [rgba][0][0], [rgba][1][0], [rgba][2][0], [rgba][3][0]
// colors1 = [rgba][0][1], [rgba][1][1], [rgba][2][1], [rgba][3][1]
// ...
// so we can basically add up the columns (multiplying by weights in between)
// and we'll end up with [final0, final1, final2, final3, final4]
// the swizzle function below rearranges to get this pattern.
// it's not the most efficient way to do it, but it just runs during loading and not on every frame.
SwizzledTimeOfDay swizzle_time_of_day(const std::vector<tfrag3::TimeOfDayColor>& in) {
SwizzledTimeOfDay out;
out.data.resize((in.size() + 3) * 8 * 4);
// we're rearranging per 4 colors (groups of 32 * 4 = 128)
// color (lots of these)
// component (8 of these)
// channel (4 of these, rgba)
for (u32 color_quad = 0; color_quad < (in.size() + 3) / 4; color_quad++) {
u8* quad_out = out.data.data() + color_quad * 128;
for (u32 component = 0; component < 8; component++) {
for (u32 color = 0; color < 4; color++) {
for (u32 channel = 0; channel < 4; channel++) {
size_t in_idx = color_quad * 4 + color;
if (in_idx < in.size()) {
*quad_out = in.at(color_quad * 4 + color).rgba[component][channel];
} else {
*quad_out = 0;
}
quad_out++;
}
}
}
}
out.color_count = (in.size() + 3) & (~3);
return out;
}
#ifndef __aarch64__
void interp_time_of_day_fast(const math::Vector<s32, 4> itimes[4],
const SwizzledTimeOfDay& swizzled_colors,
math::Vector<u8, 4>* out) {
math::Vector<u16, 4> weights[8];
for (int component = 0; component < 8; component++) {
int quad_idx = component / 2;
int word_off = (component % 2 * 2);
for (int channel = 0; channel < 4; channel++) {
int word = word_off + (channel / 2);
int hw_off = channel % 2;
u32 word_val = itimes[quad_idx][word];
u32 hw_val = hw_off ? (word_val >> 16) : word_val;
hw_val = hw_val & 0xff;
weights[component][channel] = hw_val;
}
}
// weight multipliers
__m128i weights0 = _mm_setr_epi16(weights[0][0], weights[0][1], weights[0][2], weights[0][3],
weights[0][0], weights[0][1], weights[0][2], weights[0][3]);
__m128i weights1 = _mm_setr_epi16(weights[1][0], weights[1][1], weights[1][2], weights[1][3],
weights[1][0], weights[1][1], weights[1][2], weights[1][3]);
__m128i weights2 = _mm_setr_epi16(weights[2][0], weights[2][1], weights[2][2], weights[2][3],
weights[2][0], weights[2][1], weights[2][2], weights[2][3]);
__m128i weights3 = _mm_setr_epi16(weights[3][0], weights[3][1], weights[3][2], weights[3][3],
weights[3][0], weights[3][1], weights[3][2], weights[3][3]);
__m128i weights4 = _mm_setr_epi16(weights[4][0], weights[4][1], weights[4][2], weights[4][3],
weights[4][0], weights[4][1], weights[4][2], weights[4][3]);
__m128i weights5 = _mm_setr_epi16(weights[5][0], weights[5][1], weights[5][2], weights[5][3],
weights[5][0], weights[5][1], weights[5][2], weights[5][3]);
__m128i weights6 = _mm_setr_epi16(weights[6][0], weights[6][1], weights[6][2], weights[6][3],
weights[6][0], weights[6][1], weights[6][2], weights[6][3]);
__m128i weights7 = _mm_setr_epi16(weights[7][0], weights[7][1], weights[7][2], weights[7][3],
weights[7][0], weights[7][1], weights[7][2], weights[7][3]);
// saturation: note that alpha is saturated to 128 but the rest are 255.
// TODO: maybe we should saturate to 255 for everybody (can do this using a single packus) and
// change the shader to deal with this.
__m128i sat = _mm_set_epi16(128, 255, 255, 255, 128, 255, 255, 255);
for (u32 color_quad = 0; color_quad < swizzled_colors.color_count / 4; color_quad++) {
// first, load colors. We put 16 bytes / register and don't touch the upper half because we
// convert u8s to u16s.
{
const u8* base = swizzled_colors.data.data() + color_quad * 128;
__m128i color0_p = _mm_loadu_si64((const __m128i*)(base + 0));
__m128i color1_p = _mm_loadu_si64((const __m128i*)(base + 16));
__m128i color2_p = _mm_loadu_si64((const __m128i*)(base + 32));
__m128i color3_p = _mm_loadu_si64((const __m128i*)(base + 48));
__m128i color4_p = _mm_loadu_si64((const __m128i*)(base + 64));
__m128i color5_p = _mm_loadu_si64((const __m128i*)(base + 80));
__m128i color6_p = _mm_loadu_si64((const __m128i*)(base + 96));
__m128i color7_p = _mm_loadu_si64((const __m128i*)(base + 112));
// unpack to 16-bits. each has 16x 16 bit colors.
__m128i color0 = _mm_cvtepu8_epi16(color0_p);
__m128i color1 = _mm_cvtepu8_epi16(color1_p);
__m128i color2 = _mm_cvtepu8_epi16(color2_p);
__m128i color3 = _mm_cvtepu8_epi16(color3_p);
__m128i color4 = _mm_cvtepu8_epi16(color4_p);
__m128i color5 = _mm_cvtepu8_epi16(color5_p);
__m128i color6 = _mm_cvtepu8_epi16(color6_p);
__m128i color7 = _mm_cvtepu8_epi16(color7_p);
// multiply by weights
color0 = _mm_mullo_epi16(color0, weights0);
color1 = _mm_mullo_epi16(color1, weights1);
color2 = _mm_mullo_epi16(color2, weights2);
color3 = _mm_mullo_epi16(color3, weights3);
color4 = _mm_mullo_epi16(color4, weights4);
color5 = _mm_mullo_epi16(color5, weights5);
color6 = _mm_mullo_epi16(color6, weights6);
color7 = _mm_mullo_epi16(color7, weights7);
// add. This order minimizes dependencies.
color0 = _mm_adds_epi16(color0, color1);
color2 = _mm_adds_epi16(color2, color3);
color4 = _mm_adds_epi16(color4, color5);
color6 = _mm_adds_epi16(color6, color7);
color0 = _mm_adds_epi16(color0, color2);
color4 = _mm_adds_epi16(color4, color6);
color0 = _mm_adds_epi16(color0, color4);
// divide, because we multiplied our weights by 2^7.
color0 = _mm_srli_epi16(color0, 6);
// saturate
color0 = _mm_min_epu16(sat, color0);
// back to u8s.
auto result = _mm_packus_epi16(color0, color0);
// store result
_mm_storel_epi64((__m128i*)(&out[color_quad * 4]), result);
}
{
const u8* base = swizzled_colors.data.data() + color_quad * 128 + 8;
__m128i color0_p = _mm_loadu_si64((const __m128i*)(base + 0));
__m128i color1_p = _mm_loadu_si64((const __m128i*)(base + 16));
__m128i color2_p = _mm_loadu_si64((const __m128i*)(base + 32));
__m128i color3_p = _mm_loadu_si64((const __m128i*)(base + 48));
__m128i color4_p = _mm_loadu_si64((const __m128i*)(base + 64));
__m128i color5_p = _mm_loadu_si64((const __m128i*)(base + 80));
__m128i color6_p = _mm_loadu_si64((const __m128i*)(base + 96));
__m128i color7_p = _mm_loadu_si64((const __m128i*)(base + 112));
// unpack to 16-bits. each has 16x 16 bit colors.
__m128i color0 = _mm_cvtepu8_epi16(color0_p);
__m128i color1 = _mm_cvtepu8_epi16(color1_p);
__m128i color2 = _mm_cvtepu8_epi16(color2_p);
__m128i color3 = _mm_cvtepu8_epi16(color3_p);
__m128i color4 = _mm_cvtepu8_epi16(color4_p);
__m128i color5 = _mm_cvtepu8_epi16(color5_p);
__m128i color6 = _mm_cvtepu8_epi16(color6_p);
__m128i color7 = _mm_cvtepu8_epi16(color7_p);
// multiply by weights
color0 = _mm_mullo_epi16(color0, weights0);
color1 = _mm_mullo_epi16(color1, weights1);
color2 = _mm_mullo_epi16(color2, weights2);
color3 = _mm_mullo_epi16(color3, weights3);
color4 = _mm_mullo_epi16(color4, weights4);
color5 = _mm_mullo_epi16(color5, weights5);
color6 = _mm_mullo_epi16(color6, weights6);
color7 = _mm_mullo_epi16(color7, weights7);
// add. This order minimizes dependencies.
color0 = _mm_adds_epi16(color0, color1);
color2 = _mm_adds_epi16(color2, color3);
color4 = _mm_adds_epi16(color4, color5);
color6 = _mm_adds_epi16(color6, color7);
color0 = _mm_adds_epi16(color0, color2);
color4 = _mm_adds_epi16(color4, color6);
color0 = _mm_adds_epi16(color0, color4);
// divide, because we multiplied our weights by 2^7.
color0 = _mm_srli_epi16(color0, 6);
// saturate
color0 = _mm_min_epu16(sat, color0);
// back to u8s.
auto result = _mm_packus_epi16(color0, color0);
// store result
_mm_storel_epi64((__m128i*)(&out[color_quad * 4 + 2]), result);
}
}
}
#endif
bool sphere_in_view_ref(const math::Vector4f& sphere, const math::Vector4f* planes) {
math::Vector4f acc =
planes[0] * sphere.x() + planes[1] * sphere.y() + planes[2] * sphere.z() - planes[3];
return acc.x() > -sphere.w() && acc.y() > -sphere.w() && acc.z() > -sphere.w() &&
acc.w() > -sphere.w();
}
// this isn't super efficient, but we spend so little time here it's not worth it to go faster.
void cull_check_all_slow(const math::Vector4f* planes,
const std::vector<tfrag3::VisNode>& nodes,
const u8* level_occlusion_string,
u8* out) {
if (level_occlusion_string) {
for (size_t i = 0; i < nodes.size(); i++) {
u16 my_id = nodes[i].my_id;
bool not_occluded =
my_id != 0xffff && level_occlusion_string[my_id / 8] & (1 << (7 - (my_id & 7)));
out[i] = not_occluded && sphere_in_view_ref(nodes[i].bsphere, planes);
}
} else {
for (size_t i = 0; i < nodes.size(); i++) {
out[i] = sphere_in_view_ref(nodes[i].bsphere, planes);
}
}
}
void make_all_visible_multidraws(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::ShrubDraw>& draws) {
u64 md_idx = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.first_index_index;
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 1;
counts_out[md_idx] = draw.num_indices;
index_offsets_out[md_idx] = (void*)(iidx * sizeof(u32));
md_idx++;
draw_ptrs_out[i] = ds;
}
}
u32 make_all_visible_multidraws(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws) {
u64 md_idx = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.unpacked.idx_of_first_idx_in_full_buffer;
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 1;
int num_inds = 0;
for (auto& grp : draw.vis_groups) {
num_tris += grp.num_tris;
num_inds += grp.num_inds;
}
counts_out[md_idx] = num_inds;
index_offsets_out[md_idx] = (void*)(iidx * sizeof(u32));
draw_ptrs_out[i] = ds;
md_idx++;
}
return num_tris;
}
u32 make_all_visible_index_list(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::ShrubDraw>& draws,
const u32* idx_in) {
int idx_buffer_ptr = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
memcpy(&idx_out[idx_buffer_ptr], idx_in + draw.first_index_index,
draw.num_indices * sizeof(u32));
idx_buffer_ptr += draw.num_indices;
ds.second = idx_buffer_ptr - ds.first;
group_out[i] = ds;
}
return idx_buffer_ptr;
}
u32 make_multidraws_from_vis_string(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data) {
u64 md_idx = 0;
u32 num_tris = 0;
u32 sanity_check = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.unpacked.idx_of_first_idx_in_full_buffer;
ASSERT(sanity_check == iidx);
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 0;
bool building_run = false;
u64 run_start = 0;
for (auto& grp : draw.vis_groups) {
sanity_check += grp.num_inds;
bool vis = grp.vis_idx_in_pc_bvh == UINT16_MAX || vis_data[grp.vis_idx_in_pc_bvh];
if (vis) {
num_tris += grp.num_tris;
}
if (building_run) {
if (!vis) {
building_run = false;
counts_out[md_idx] = iidx - run_start;
index_offsets_out[md_idx] = (void*)(run_start * sizeof(u32));
ds.second++;
md_idx++;
}
} else {
if (vis) {
building_run = true;
run_start = iidx;
}
}
iidx += grp.num_inds;
}
if (building_run) {
building_run = false;
counts_out[md_idx] = iidx - run_start;
index_offsets_out[md_idx] = (void*)(run_start * sizeof(u32));
ds.second++;
md_idx++;
}
draw_ptrs_out[i] = ds;
}
return num_tris;
}
u32 make_multidraws_from_vis_and_proto_string(std::pair<int, int>* draw_ptrs_out,
GLsizei* counts_out,
void** index_offsets_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data,
const std::vector<u8>& proto_vis_data) {
u64 md_idx = 0;
u32 num_tris = 0;
u32 sanity_check = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
u64 iidx = draw.unpacked.idx_of_first_idx_in_full_buffer;
ASSERT(sanity_check == iidx);
std::pair<int, int> ds;
ds.first = md_idx;
ds.second = 0;
bool building_run = false;
u64 run_start = 0;
for (auto& grp : draw.vis_groups) {
sanity_check += grp.num_inds;
bool vis = (grp.vis_idx_in_pc_bvh == UINT16_MAX || vis_data[grp.vis_idx_in_pc_bvh]) &&
proto_vis_data[grp.tie_proto_idx];
if (vis) {
num_tris += grp.num_tris;
}
if (building_run) {
if (!vis) {
building_run = false;
counts_out[md_idx] = iidx - run_start;
index_offsets_out[md_idx] = (void*)(run_start * sizeof(u32));
ds.second++;
md_idx++;
}
} else {
if (vis) {
building_run = true;
run_start = iidx;
}
}
iidx += grp.num_inds;
}
if (building_run) {
building_run = false;
counts_out[md_idx] = iidx - run_start;
index_offsets_out[md_idx] = (void*)(run_start * sizeof(u32));
ds.second++;
md_idx++;
}
draw_ptrs_out[i] = ds;
}
return num_tris;
}
u32 make_index_list_from_vis_string(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data,
const u32* idx_in,
u32* num_tris_out) {
int idx_buffer_ptr = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
int vtx_idx = 0;
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
bool building_run = false;
int run_start_out = 0;
int run_start_in = 0;
for (auto& grp : draw.vis_groups) {
bool vis = grp.vis_idx_in_pc_bvh == UINT16_MAX || vis_data[grp.vis_idx_in_pc_bvh];
if (vis) {
num_tris += grp.num_tris;
}
if (building_run) {
if (vis) {
idx_buffer_ptr += grp.num_inds;
} else {
building_run = false;
memcpy(&idx_out[run_start_out],
idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer + run_start_in,
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
} else {
if (vis) {
building_run = true;
run_start_out = idx_buffer_ptr;
run_start_in = vtx_idx;
idx_buffer_ptr += grp.num_inds;
}
}
vtx_idx += grp.num_inds;
}
if (building_run) {
memcpy(&idx_out[run_start_out],
idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer + run_start_in,
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
ds.second = idx_buffer_ptr - ds.first;
group_out[i] = ds;
}
*num_tris_out = num_tris;
return idx_buffer_ptr;
}
u32 make_index_list_from_vis_and_proto_string(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::StripDraw>& draws,
const std::vector<u8>& vis_data,
const std::vector<u8>& proto_vis_data,
const u32* idx_in,
u32* num_tris_out) {
int idx_buffer_ptr = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
int vtx_idx = 0;
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
bool building_run = false;
int run_start_out = 0;
int run_start_in = 0;
for (auto& grp : draw.vis_groups) {
bool vis = (grp.vis_idx_in_pc_bvh == UINT16_MAX || vis_data[grp.vis_idx_in_pc_bvh]) &&
proto_vis_data[grp.tie_proto_idx];
if (vis) {
num_tris += grp.num_tris;
}
if (building_run) {
if (vis) {
idx_buffer_ptr += grp.num_inds;
} else {
building_run = false;
memcpy(&idx_out[run_start_out],
idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer + run_start_in,
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
} else {
if (vis) {
building_run = true;
run_start_out = idx_buffer_ptr;
run_start_in = vtx_idx;
idx_buffer_ptr += grp.num_inds;
}
}
vtx_idx += grp.num_inds;
}
if (building_run) {
memcpy(&idx_out[run_start_out],
idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer + run_start_in,
(idx_buffer_ptr - run_start_out) * sizeof(u32));
}
ds.second = idx_buffer_ptr - ds.first;
group_out[i] = ds;
}
*num_tris_out = num_tris;
return idx_buffer_ptr;
}
u32 make_all_visible_index_list(std::pair<int, int>* group_out,
u32* idx_out,
const std::vector<tfrag3::StripDraw>& draws,
const u32* idx_in,
u32* num_tris_out) {
int idx_buffer_ptr = 0;
u32 num_tris = 0;
for (size_t i = 0; i < draws.size(); i++) {
const auto& draw = draws[i];
std::pair<int, int> ds;
ds.first = idx_buffer_ptr;
u32 num_inds = 0;
for (auto& grp : draw.vis_groups) {
num_inds += grp.num_inds;
num_tris += grp.num_tris;
}
memcpy(&idx_out[idx_buffer_ptr], idx_in + draw.unpacked.idx_of_first_idx_in_full_buffer,
num_inds * sizeof(u32));
idx_buffer_ptr += num_inds;
ds.second = idx_buffer_ptr - ds.first;
group_out[i] = ds;
}
*num_tris_out = num_tris;
return idx_buffer_ptr;
}
void update_render_state_from_pc_settings(SharedRenderState* state, const TfragPcPortData& data) {
if (!state->has_pc_data) {
for (int i = 0; i < 4; i++) {
state->camera_planes[i] = data.camera.planes[i];
state->camera_matrix[i] = data.camera.camera[i];
}
state->camera_pos = data.camera.trans;
state->camera_hvdf_off = data.camera.hvdf_off;
state->camera_fog = data.camera.fog;
state->has_pc_data = true;
}
}