367 lines
12 KiB
C++
367 lines
12 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "VectorVMBackend.h"
|
|
#include "ShaderFormatVectorVM.h"
|
|
#include "hlslcc.h"
|
|
#include "hlslcc_private.h"
|
|
#include "compiler.h"
|
|
|
|
PRAGMA_DISABLE_SHADOW_VARIABLE_WARNINGS
|
|
#include "glsl_parser_extras.h"
|
|
PRAGMA_ENABLE_SHADOW_VARIABLE_WARNINGS
|
|
|
|
#include "hash_table.h"
|
|
#include "ir_rvalue_visitor.h"
|
|
#include "PackUniformBuffers.h"
|
|
#include "IRDump.h"
|
|
#include "OptValueNumbering.h"
|
|
#include "ir_optimization.h"
|
|
#include "ir_expression_flattening.h"
|
|
#include "ir.h"
|
|
|
|
#include "VectorVM.h"
|
|
|
|
#include "HAL/IConsoleManager.h"
|
|
|
|
#include "Stats/Stats.h"
|
|
|
|
DECLARE_STATS_GROUP(TEXT("VectorVMBackend"), STATGROUP_VectorVMBackend, STATCAT_Advanced);
|
|
|
|
DECLARE_CYCLE_STAT(TEXT("Generate Main"), STAT_VVMGenerateMain, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Generate Code"), STAT_VVMGenerateCode, STATGROUP_VectorVMBackend);
|
|
|
|
DECLARE_CYCLE_STAT(TEXT("Initial/Misc"), STAT_VVMInitMisc, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Matriced To Vectors"), STAT_VVMMatToVec, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Branches To Selects"), STAT_VVMBranchesToSelects, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("To Single Op"), STAT_VVMToSingleOp, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Scalarize"), STAT_VVMScalarize, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Merge Ops"), STAT_VVMMergeOps, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Propagate non-expressions"), STAT_VVMPropagateNonExprs, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Cleanup"), STAT_VVMCleanup, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Validate"), STAT_VVMValidate, STATGROUP_VectorVMBackend);
|
|
DECLARE_CYCLE_STAT(TEXT("Gen Bytecode"), STAT_VVMGenByteCode, STATGROUP_VectorVMBackend);
|
|
|
|
|
|
#if !PLATFORM_WINDOWS
|
|
#define _strdup strdup
|
|
#endif
|
|
|
|
DEFINE_LOG_CATEGORY(LogVVMBackend);
|
|
|
|
bool FVectorVMCodeBackend::GenerateMain(EHlslShaderFrequency Frequency, const char* EntryPoint, exec_list* Instructions, _mesa_glsl_parse_state* ParseState)
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMGenerateMain);
|
|
//vm_debug_dump(Instructions, ParseState);
|
|
|
|
ir_function_signature* MainSig = nullptr;
|
|
int NumFunctions = 0;
|
|
|
|
foreach_iter(exec_list_iterator, iter, *Instructions)
|
|
{
|
|
ir_instruction *ir = (ir_instruction *)iter.get();
|
|
ir_function* Function = ir->as_function();
|
|
if (Function)
|
|
{
|
|
if (strcmp(Function->name, "SimulateMain") == 0)
|
|
{
|
|
foreach_iter(exec_list_iterator, sigiter, *Function)
|
|
{
|
|
ir_function_signature* Sig = (ir_function_signature *)sigiter.get();
|
|
Sig->is_main = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//vm_debug_dump(Instructions, ParseState);
|
|
return true;
|
|
}
|
|
|
|
/** Finds all external function calls (those with no body) and make them built in. This is to bypass issues with constant propagation visitor. */
|
|
class ir_make_external_funcs_builtin : public ir_hierarchical_visitor
|
|
{
|
|
_mesa_glsl_parse_state *parse_state;
|
|
ir_make_external_funcs_builtin(_mesa_glsl_parse_state *in_state) : parse_state(in_state) { }
|
|
virtual ~ir_make_external_funcs_builtin() { }
|
|
|
|
virtual ir_visitor_status visit_enter(ir_function_signature *sig)
|
|
{
|
|
if (sig->body.get_head() == nullptr)
|
|
{
|
|
sig->is_builtin = true;
|
|
}
|
|
return visit_continue;
|
|
}
|
|
|
|
public:
|
|
static void run(exec_list *ir, _mesa_glsl_parse_state *state)
|
|
{
|
|
ir_make_external_funcs_builtin visitor(state);
|
|
visit_list_elements(&visitor, ir);
|
|
}
|
|
};
|
|
|
|
/** Removes any stat scopes that are now empty due to other optimizations. */
|
|
class ir_remove_empty_stat_scopes : public ir_hierarchical_visitor
|
|
{
|
|
_mesa_glsl_parse_state *parse_state;
|
|
ir_remove_empty_stat_scopes(_mesa_glsl_parse_state *in_state)
|
|
: parse_state(in_state)
|
|
, progress(false)
|
|
{
|
|
}
|
|
virtual ~ir_remove_empty_stat_scopes() { }
|
|
|
|
TArray<ir_call*> EnterStatScopeCalls;
|
|
TArray<int32> StatScopeAssingmentCounts;
|
|
|
|
bool progress;
|
|
|
|
virtual ir_visitor_status visit_enter(ir_call* call)
|
|
{
|
|
if (strcmp(call->callee_name(), "EnterStatScope") == 0)
|
|
{
|
|
EnterStatScopeCalls.Push(call);
|
|
StatScopeAssingmentCounts.Push(0);
|
|
}
|
|
else if (strcmp(call->callee_name(), "ExitStatScope") == 0)
|
|
{
|
|
check(EnterStatScopeCalls.Num() == StatScopeAssingmentCounts.Num());
|
|
if (EnterStatScopeCalls.Num() == 0)
|
|
{
|
|
_mesa_glsl_error(parse_state, "Mismatched EnterStatScope/ExitStatScope calls.");
|
|
return visit_stop;
|
|
}
|
|
|
|
ir_call* EnterCall = EnterStatScopeCalls.Pop();
|
|
int32 NumAssignments = StatScopeAssingmentCounts.Pop();
|
|
if (NumAssignments == 0)
|
|
{
|
|
EnterCall->remove();
|
|
call->remove();
|
|
}
|
|
else if (StatScopeAssingmentCounts.Num() > 0)
|
|
{
|
|
// On leaving a non-empty scope, we add their assignments to the outer scope.
|
|
// Without this, nested scopes can be removed although they are not empty.
|
|
StatScopeAssingmentCounts.Last() += NumAssignments;
|
|
}
|
|
}
|
|
else if (StatScopeAssingmentCounts.Num() > 0)
|
|
{
|
|
++StatScopeAssingmentCounts.Last();
|
|
}
|
|
return visit_continue_with_parent;
|
|
}
|
|
|
|
virtual ir_visitor_status visit_enter(ir_assignment* assign)
|
|
{
|
|
if (StatScopeAssingmentCounts.Num() > 0)
|
|
{
|
|
++StatScopeAssingmentCounts.Last();
|
|
}
|
|
return visit_continue_with_parent;
|
|
}
|
|
|
|
public:
|
|
|
|
static void run(exec_list *ir, _mesa_glsl_parse_state *state)
|
|
{
|
|
ir_remove_empty_stat_scopes visitor(state);
|
|
do
|
|
{
|
|
visitor.progress = false;
|
|
visit_list_elements(&visitor, ir);
|
|
} while (visitor.progress);
|
|
}
|
|
};
|
|
|
|
char* FVectorVMCodeBackend::GenerateCode(exec_list* ir, _mesa_glsl_parse_state* state, EHlslShaderFrequency Frequency)
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMGenerateCode);
|
|
vm_debug_print("========VECTOR VM BACKEND: Generate Code==============\n");
|
|
vm_debug_dump(ir, state);
|
|
|
|
if (state->error) return nullptr;
|
|
|
|
bool progress = false;
|
|
|
|
ir_make_external_funcs_builtin::run(ir, state);
|
|
|
|
FlattenUniformBufferStructures(ir, state);
|
|
|
|
// The vector VM doesn't support dynamic loops, and the codegen panics if it sees loops. We'll just set the unroll threshold
|
|
// to a very large value, to make sure loops are always unrolled.
|
|
state->maxunrollcount = 16384;
|
|
|
|
state->conservative_propagation = false;
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMInitMisc);
|
|
vm_debug_print("== Initial misc ==\n");
|
|
do
|
|
{
|
|
// progress = do_function_inlining(ir);//Full optimization pass earlier will have already done this.
|
|
progress = do_mat_op_to_vec(ir);
|
|
progress = do_vec_op_to_scalar(ir, state) || progress;
|
|
progress = do_vec_index_to_swizzle(ir) || progress;
|
|
|
|
// slip in this call to extract scalar constants out of swizzled constant vectors
|
|
progress = vm_extract_scalar_ops(ir, state) || progress;
|
|
|
|
progress = do_optimization_pass(ir, state, true) || progress;
|
|
vm_debug_print("======== Optimization Pass ==============\n");
|
|
vm_debug_dump(ir, state);
|
|
} while (progress);
|
|
//validate_ir_tree(ir, state);
|
|
if (state->error) return nullptr;
|
|
}
|
|
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMMatToVec);
|
|
//99% complete code to remove all matrices from the code and replace them with just swizzled vectors.
|
|
//For now visitors below here can handle matrices ok but we may hit some edge cases in future requiring their removal.
|
|
vm_debug_print("== matrices to vectors ==\n");
|
|
vm_matrices_to_vectors(ir, state);
|
|
vm_debug_dump(ir, state);
|
|
if (state->error) return nullptr;
|
|
}
|
|
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMBranchesToSelects);
|
|
vm_debug_print("== Branches to selects ==\n");
|
|
vm_flatten_branches_to_selects(ir, state);
|
|
//validate_ir_tree(ir, state);
|
|
vm_debug_dump(ir, state);
|
|
}
|
|
|
|
{
|
|
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMToSingleOp);
|
|
vm_debug_print("== To Single Op ==\n");
|
|
vm_to_single_op(ir, state);
|
|
//validate_ir_tree(ir, state);
|
|
vm_debug_dump(ir, state);
|
|
if (state->error) return nullptr;
|
|
}
|
|
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMScalarize);
|
|
vm_debug_print("== Scalarize ==\n");
|
|
vm_scalarize_ops(ir, state);
|
|
//validate_ir_tree(ir, state);
|
|
vm_debug_dump(ir, state);
|
|
//validate_ir_tree(ir, state);
|
|
if (state->error) return nullptr;
|
|
}
|
|
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMScalarize);
|
|
vm_debug_print("== Scalar Optimization ==\n");
|
|
progress = false;
|
|
do
|
|
{
|
|
progress = do_algebraic(state, ir);
|
|
} while (progress);
|
|
vm_debug_dump(ir, state);
|
|
if (state->error) return nullptr;
|
|
}
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMMergeOps);
|
|
vm_debug_print("== Merge Ops ==\n");
|
|
vm_merge_ops(ir, state);
|
|
// validate_ir_tree(ir, state);
|
|
vm_debug_dump(ir, state);
|
|
//validate_ir_tree(ir, state);
|
|
if (state->error) return nullptr;
|
|
}
|
|
|
|
{
|
|
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMPropagateNonExprs);
|
|
vm_debug_print("== Propagate non-expressions ==\n");
|
|
vm_propagate_non_expressions_visitor(ir, state);
|
|
//validate_ir_tree(ir, state);
|
|
vm_debug_dump(ir, state);
|
|
if (state->error) return nullptr;
|
|
}
|
|
|
|
{
|
|
//SCOPE_CYCLE_COUNTER(STAT_VVMCleanup);
|
|
vm_debug_print("== Remove self ref assignments ==\n");
|
|
vm_remove_self_ref_assignments(ir, state);
|
|
vm_debug_dump(ir, state);
|
|
if (state->error) return nullptr;
|
|
}
|
|
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMCleanup);
|
|
vm_debug_print("== Cleanup ==\n");
|
|
// Final cleanup
|
|
progress = false;
|
|
do
|
|
{
|
|
progress = do_dead_code(ir, false);
|
|
progress = do_dead_code_local(ir) || progress;
|
|
progress = do_swizzle_swizzle(ir) || progress;
|
|
progress = do_noop_swizzle(ir) || progress;
|
|
progress = do_copy_propagation(ir) || progress;
|
|
progress = do_copy_propagation_elements(ir) || progress;
|
|
progress = do_constant_propagation(ir) || progress;
|
|
} while (progress);
|
|
|
|
ir_remove_empty_stat_scopes::run(ir, state);
|
|
vm_debug_dump(ir, state);
|
|
}
|
|
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMValidate);
|
|
validate_ir_tree(ir, state);
|
|
}
|
|
|
|
if (state->error) return nullptr;
|
|
|
|
{
|
|
SCOPE_CYCLE_COUNTER(STAT_VVMGenByteCode);
|
|
vm_gen_bytecode(ir, state, CompilationOutput);
|
|
}
|
|
|
|
return nullptr;// Cheat and emit the bytecode into the CompilationOutput. The return here is treated as a string so the 0's it contains prove problematic.
|
|
}
|
|
|
|
void FVectorVMLanguageSpec::SetupLanguageIntrinsics(_mesa_glsl_parse_state* State, exec_list* ir)
|
|
{
|
|
// //TODO: Need to add a way of stopping these being stripped if they're not used in the code.
|
|
// //We fine if the func is unused entirely but we need to keep the scalar signatures for when we scalarize the call.
|
|
// //Maybe we can just keep the wrong sig but still replace the ret val and params?
|
|
// make_intrinsic_genType(ir, State, "mad", ir_invalid_opcode, IR_INTRINSIC_FLOAT, 3, 1, 4);
|
|
make_intrinsic_genType(ir, State, "rand", ir_invalid_opcode, IR_INTRINSIC_FLOAT, 1, 1, 4);
|
|
make_intrinsic_genType(ir, State, "rand", ir_invalid_opcode, IR_INTRINSIC_INT, 1, 1, 4);
|
|
make_intrinsic_genType(ir, State, "Modulo", ir_invalid_opcode, IR_INTRINSIC_FLOAT, 1, 1, 4);
|
|
|
|
make_intrinsic_genType(ir, State, "EnterStatScope", ir_invalid_opcode, IR_INTRINSIC_RETURNS_VOID | IR_INTRINSIC_INT, 1, 1, 1);
|
|
make_intrinsic_genType(ir, State, "ExitStatScope", ir_invalid_opcode, IR_INTRINSIC_RETURNS_VOID, 0, 0, 0);
|
|
|
|
// Dont need all these as we're only using the basic scalar function which we provide the signature for in the usf.
|
|
// make_intrinsic_genType(ir, State, "InputDataFloat", ir_invalid_opcode, IR_INTRINSIC_FLOAT, 2, 1, 1);
|
|
// make_intrinsic_genType(ir, State, "InputDataInt", ir_invalid_opcode, IR_INTRINSIC_INT, 2, 1, 1);
|
|
// make_intrinsic_genType(ir, State, "OutputDataFloat", ir_invalid_opcode, 0, 3, 1, 1);
|
|
// make_intrinsic_genType(ir, State, "OutputDataInt", ir_invalid_opcode, 0, 3, 1, 1);
|
|
// make_intrinsic_genType(ir, State, "AcquireIndex", ir_invalid_opcode, IR_INTRINSIC_INT, 2, 1, 1);
|
|
}
|
|
|
|
static int32 GbDumpVMIR = 0;
|
|
static FAutoConsoleVariableRef CVarDumpVMIR(
|
|
TEXT("fx.DumpVMIR"),
|
|
GbDumpVMIR,
|
|
TEXT("If > 0 verbose logging is enabled for the vm compiler backend. \n"),
|
|
ECVF_Default
|
|
);
|
|
|
|
void DebugDumpIR(struct exec_list* ir, struct _mesa_glsl_parse_state* State)
|
|
{
|
|
if (GbDumpVMIR)
|
|
{
|
|
IRDump(ir, State);
|
|
}
|
|
}
|