P/Invoking unmanaged C++ code from c# - Getting a "tried to access protected memory error"

In short, I'm trying to do SIMD math from C# using an unmanaged C++ multiply function and messing with pointers.

C++ function:

extern "C" __declspec(dllexport) void SIMDVector4Mult( __m128* a, __m128* b )
    __m128 c = _mm_mul_ps( *a, *b );
    memcpy( &(a->m128_f32), &(c.m128_f32), sizeof( a->m128_f32 ) );

C# SIMDVector4 struct:

[StructLayout( LayoutKind.Sequential, Size = 16 )]
struct SIMDVector4
    float x;
    float y;
    float z;
    float w;

    public SIMDVector4( float x, float y, float z, float w )
        this.x = x;
        this.y = y;
        this.z = z;
        this.w = w;

    public override string ToString( )
        return "X: " + x + ", Y: " + y + ", Z: " + z + ", W: " + w;

    public static unsafe SIMDVector4 operator *( SIMDVector4 a, SIMDVector4 b )
        SIMDVector4Mult( ref a, ref b );
        return a;

    [DllImport("SIMDMathLibrary.dll", CallingConvention = CallingConvention.Cdecl )]
    extern private static unsafe void SIMDVector4Mult( ref SIMDVector4 a, ref SIMDVector4 b );

The intent is to add SIMD math to my C# code base, as well as getting some practice with P/Invoke and combining C# and C++.

It works if I receive them as float* variables, memcpy them to temporary __m128 variables, do the multiplication, then memcpy them back. But that's slower than just multiplying each variably singly in C#. I tested this using an optimized Vector4 class; it achieves roughly ten times the performance of the copy-multiply-copy method.

I've tried almost a dozen different things, including the current setup, which runs into the protected memory read/write access error mentioned in the title: "Attempted to read or write protected memory. This is often an indication that other memory is corrupt."

How do I fix this error? Should I fix this error? Should I try a different method of achieving the intent altogether?


Here is an updated one that should work. Now the struct wraps an aligned ptr, so we need an extra 4-8 bytes. Not sure what perf hit due to the indirection is. VirtualAlloc aligns a bunch at time, so we allocate a big chunk at a time.

The example below is very sloppy. You might also want to look at using CLI/C++ instead if possible.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Runtime.InteropServices;
using System.Runtime.ConstrainedExecution;

namespace ConsoleApplication1
    class Program
        unsafe static void Main(string[] args)
            using (AlignedBlock block = new AlignedBlock(SIMDVector4.Sizeof, AlignedBlock.AllocationGranularity / SIMDVector4.Sizeof))
                for (int index = 0; index < block.Count; index++)
                    SIMDVector4.Create(block[index], 1, 2, 3, 4);

                SIMDVector4 a = SIMDVector4.Create(block[0], 1, 2, 3, 4);
                SIMDVector4 b = SIMDVector4.Create(block[1], 1, 2, 3, 4);
                SIMDVector4 c = SIMDVector4.Create(block[2], 5, 6, 7, 8);
                SIMDVector4 d = a * b * c;


    public sealed unsafe class AlignedBlock : CriticalFinalizerObject, IDisposable
        private void* ptr;

        static AlignedBlock()
            SYSTEM_INFO info = new SYSTEM_INFO();
            NativeMethods.GetSystemInfo(ref info);
            PageSize = (int)info.dwPageSize;
            AllocationGranularity = (int)info.dwAllocationGranularity;

        public AlignedBlock(int itemSize, int count)
            this.ItemSize = itemSize; 
            this.Count = count;
            ptr = NativeMethods.VirtualAlloc(IntPtr.Zero, new UIntPtr((uint)this.ByteSize), AllocationType.COMMIT, MemoryProtection.READWRITE).ToPointer();


        public static int PageSize { get; private set; }
        public static int AllocationGranularity { get; private set; }
        public int ItemSize { get; private set; }
        public int Count { get; private set; }
        public int ByteSize
                return this.Count * this.ItemSize;

        public void* this[int index]
                int offset = this.ItemSize * index;
                return ((byte*) this.ptr) + offset;

        public void Dispose()
            bool result = NativeMethods.VirtualFree(new IntPtr(this.ptr), new UIntPtr(0), 0x8000);
            this.ptr = null;

    unsafe struct SIMDVector4
        public const int Sizeof = 16;
        private float* ptr;

        public static SIMDVector4 Create(void* ptr, float x, float y, float z, float w)
            float* value = (float*)ptr;

            SIMDVector4 vector = new SIMDVector4(value);
            *value = x;
            *value = y;
            *value = z;
            *value = w;
            return vector;

        private SIMDVector4(float* ptr)
            this.ptr = ptr;

        public bool IsEmpty
                return this.ptr == null;
        public float x
                if (this.IsEmpty)
                    return 0f;

                return *(ptr);

                if (!this.IsEmpty)
                    *(this.ptr) = value;

        public float y
                if (this.IsEmpty)
                    return 0f;

                return *(ptr + 1);

                if (!this.IsEmpty)
                    *(this.ptr + 1) = value;

        public float z
                if (this.IsEmpty)
                    return 0f;

                return *(ptr + 2);

                if (!this.IsEmpty)
                    *(this.ptr + 2) = value;

        public float w
                if (this.IsEmpty)
                    return 0f;

                return *(ptr + 3);

                if (!this.IsEmpty)
                    *(this.ptr + 3) = value;

        public override string ToString()
            return "X: " + x + ", Y: " + y + ", Z: " + z + ", W: " + w;

        public static SIMDVector4 operator *(SIMDVector4 a, SIMDVector4 b)

            SIMDVector4Mult(a.ptr, b.ptr);
            return new SIMDVector4(a.ptr);

        [DllImport(@"C:\Users\xxx\Documents\Visual Studio 2010\Projects\TestDll\Debug\TestDll.dll", CallingConvention = CallingConvention.Cdecl)]
        extern private static unsafe void SIMDVector4Mult(void* a, void* b);

    internal static class NativeMethods

        [DllImport("kernel32.dll", SetLastError = true)]
        public static extern bool VirtualFree(IntPtr lpAddress, UIntPtr dwSize,
               uint dwFreeType);

        [DllImport("kernel32.dll", SetLastError = true)]
        public static extern IntPtr VirtualAlloc(IntPtr lpAddress, UIntPtr dwSize,
               AllocationType flAllocationType, MemoryProtection flProtect);

        public static extern void GetSystemInfo([MarshalAs(UnmanagedType.Struct)] ref SYSTEM_INFO lpSystemInfo);


    public struct SYSTEM_INFO
        internal _PROCESSOR_INFO_UNION uProcessorInfo;
        public uint dwPageSize;
        public IntPtr lpMinimumApplicationAddress;
        public IntPtr lpMaximumApplicationAddress;
        public IntPtr dwActiveProcessorMask;
        public uint dwNumberOfProcessors;
        public uint dwProcessorType;
        public uint dwAllocationGranularity;
        public ushort dwProcessorLevel;
        public ushort dwProcessorRevision;

    public struct _PROCESSOR_INFO_UNION
        internal uint dwOemId;
        internal ushort wProcessorArchitecture;
        internal ushort wReserved;

    public enum AllocationType : uint
        COMMIT = 0x1000,
        RESERVE = 0x2000,
        RESET = 0x80000,
        LARGE_PAGES = 0x20000000,
        PHYSICAL = 0x400000,
        TOP_DOWN = 0x100000,
        WRITE_WATCH = 0x200000

    public enum MemoryProtection : uint
        EXECUTE = 0x10,
        EXECUTE_READ = 0x20,
        EXECUTE_READWRITE = 0x40,
        EXECUTE_WRITECOPY = 0x80,
        NOACCESS = 0x01,
        READONLY = 0x02,
        READWRITE = 0x04,
        WRITECOPY = 0x08,
        GUARD_Modifierflag = 0x100,
        NOCACHE_Modifierflag = 0x200,
        WRITECOMBINE_Modifierflag = 0x400

Upvotes: 2

