dotnet performance:
Hi all,
I’ve originally posted this over here:
http://forums.microsoft.com/MSDN/ShowPost.aspx?siteid=1&PostID=3140358 But figured that this might be the better place to ask my questioned…
I’m experiencing this strange optimization issue in Visual Studio 2008 in
C++/CLI. I’m getting significantly slower code for a very critical function
in a scientific application we are developing, but only when the function is
compiled with optimizations (and is not executed under the debugger).
Here are some times I get with various build/run configurations:
AMD Opteron 256
Generation of 1,000,000,000 random values
Debug Code; Inside Debugger: 6 Seconds 5.8170 Milliseconds
Optimized Code; Inside Debugger: 5 Seconds 953.1670 Milliseconds
Optimized Code; Outside Debugger: 11 Seconds 31.2150 Milliseconds
As you can see the optimized code produces the best performance but only
when running *inside* the debugger (when JIT optimizations are disabled)!
Outside the debugger it is quite slow, even slower then the non-optimized
code.
I’ve included snapshots of the code in question (abbreviated), the IL (both
optimized and non-optimized), and the assembly produced in all three cases.
If someone can shine some light on this problem I would be greatly
appreciated.
It seems to me from looking at the assembly, that the JIT compiler is adding
some type of exception handler or security code, but I’m not sure or for that
matter why it would be doing this only for optimized code running outside the
debugger. You can see in the optimized assembly lanague that it is accessing
segment FS, but again, only when JIT optimizations are enabled. The class
and the method contain no exception handlers, thread specific storage or
static constructors.
NOTE: I’ve simplified the code and reduced some of the namespaces for
clarity.
Thanks much,
James C. Papp
*** C++/CLI Code (Simplified) ***
extern "C"
{
[SuppressUnmanagedCodeSecurity]
void __declspec(nothrow) __stdcall
StreamingMersenneTwisterRefresh(unsigned __int32 * const pulStates);
}
[SuppressUnmanagedCodeSecurity]
public ref class StreamingMersenneTwisterRandom sealed : public IRandom
{
...
public:
virtual unsigned __int32 NextUInt()
{
if (m_paulIndex != m_paulEndSentinel)
{
return *m_paulIndex++;
}
StreamingMersenneTwisterRefresh(m_paulStateTable);
m_paulIndex -= 624 - 1;
return *m_paulStateTable;
}
};
*** DEBUG IL ***
..method public hidebysig newslot virtual final instance uint32 NextUInt()
cil managed
{
.maxstack 4
.locals (
[0] uint32 num)
L_0000: ldarg.0
L_0001: ldfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0006: ldarg.0
L_0007: ldfld uint32* modopt([mscorlib]CompilerServices.IsConst)
modopt([mscorlib]CompilerServices.IsConst)
StreamingMersenneTwisterRandom::m_paulEndSentinel
L_000c: beq.s L_0026
L_000e: ldarg.0
L_000f: ldfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0014: ldind.i4
L_0015: ldarg.0
L_0016: dup
L_0017: ldfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_001c: ldc.i4.4
L_001d: add
L_001e: stfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0023: stloc.0
L_0024: br.s L_0053
L_0026: ldarg.0
L_0027: dup
L_0028: ldfld uint32* modopt([mscorlib]CompilerServices.IsConst)
modopt([mscorlib]CompilerServices.IsConst)
StreamingMersenneTwisterRandom::m_paulStateTable
L_002d: stfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0032: ldarg.0
L_0033: ldfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0038: call void modopt([mscorlib]CompilerServices.CallConvStdcall)
::StreamingMersenneTwisterRefresh(uint32*
modopt([mscorlib]CompilerServices.IsConst)
modopt([mscorlib]CompilerServices.IsConst))
L_003d: ldarg.0
L_003e: ldfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0043: ldind.i4
L_0044: ldarg.0
L_0045: dup
L_0046: ldfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_004b: ldc.i4.4
L_004c: add
L_004d: stfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0052: stloc.0
L_0053: ldloc.0
L_0054: ret
}
*** OPTIMIZED IL ***
..method public hidebysig newslot virtual final instance uint32 NextUInt()
cil managed
{
.maxstack 4
.locals (
[0] uint32* numPtr)
L_0000: ldarg.0
L_0001: ldfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0006: stloc.0
L_0007: ldloc.0
L_0008: ldarg.0
L_0009: ldfld uint32* modopt([mscorlib]CompilerServices.IsConst)
modopt([mscorlib]CompilerServices.IsConst)
StreamingMersenneTwisterRandom::m_paulEndSentinel
L_000e: beq.s L_001c
L_0010: ldloc.0
L_0011: ldind.i4
L_0012: ldarg.0
L_0013: ldloc.0
L_0014: ldc.i4.4
L_0015: add
L_0016: stfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_001b: ret
L_001c: ldarg.0
L_001d: ldfld uint32* modopt([mscorlib]CompilerServices.IsConst)
modopt([mscorlib]CompilerServices.IsConst)
StreamingMersenneTwisterRandom::m_paulStateTable
L_0022: call void modopt([mscorlib]CompilerServices.CallConvStdcall)
::StreamingMersenneTwisterRefresh(uint32*
modopt([mscorlib]CompilerServices.IsConst)
modopt([mscorlib]CompilerServices.IsConst))
L_0027: ldarg.0
L_0028: dup
L_0029: ldfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_002e: ldc.i4 0x9bc
L_0033: sub
L_0034: stfld uint32* StreamingMersenneTwisterRandom::m_paulIndex
L_0039: ldarg.0
L_003a: ldfld uint32* modopt([mscorlib]CompilerServices.IsConst)
modopt([mscorlib]CompilerServices.IsConst)
StreamingMersenneTwisterRandom::m_paulStateTable
L_003f: ldind.i4
L_0040: ret
}
*** ASM - DEBUG CODE - INSIDE DEBUGGER ***
382: virtual unsigned __int32 NextUInt()
383: {
384: if (m_paulIndex != m_paulEndSentinel)
00000000 57 push edi
00000001 56 push esi
00000002 53 push ebx
00000003 8B F1 mov esi,ecx
00000005 83 3D 08 6C AE 05 00 cmp dword ptr ds:[05AE6C08h],0
0000000c 74 05 je 00000013
0000000e E8 B4 24 CD 73 call 73CD24C7
00000013 33 FF xor edi,edi
00000015 8B 46 04 mov eax,dword ptr [esi+4]
00000018 3B 46 10 cmp eax,dword ptr [esi+10h]
0000001b 74 0E je 0000002B
385: {
386: return *m_paulIndex++;
0000001d 8B 46 04 mov eax,dword ptr [esi+4]
00000020 8B 18 mov ebx,dword ptr [eax]