platelet
platelet

Reputation: 185

There is a huge speed difference between reading and writing in DRAM, is this normal?

I want to measure the bandwidth of my DRAM during reading and writing.

Measurement method: repeatedly scan a 128MB array, only accessing the first 4 bytes of each cache line. Reading uses add eax/ebx, [mem], writing uses mov [mem], eax.

The expected result: the bandwidth of writing is 1/2 of the bandwidth of reading, because the cache uses a write-back policy.

The actual result: the bandwidth of writing is only 1/20 of the bandwidth of reading.

Is there any problem with my measurement method? Is the bandwidth difference between reading and writing really that huge?

Code:

asm (R"(
.bss
.align 64
    A: .zero 128 * 1024 * 1024
.text
)");
void read() {
    asm volatile (R"(
.intel_syntax noprefix

lea rdi, A[rip]
lea rsi, [rdi + 128 * 1024 * 1024]
.align 64
1:
    add eax, [rdi + 64 * 0]
    add ebx, [rdi + 64 * 1]
    add ecx, [rdi + 64 * 2]
    add edx, [rdi + 64 * 3]
    add eax, [rdi + 64 * 4]
    add ebx, [rdi + 64 * 5]
    add ecx, [rdi + 64 * 6]
    add edx, [rdi + 64 * 7]
    add rdi, 64 * 8
    cmp rdi, rsi
    jne 1b

.att_syntax
)"::: "rdi", "rsi", "eax", "ebx", "ecx", "edx"
    );
}
void write() {
    asm volatile (R"(
.intel_syntax noprefix

lea rdi, A[rip]
lea rsi, [rdi + 128 * 1024 * 1024]
.align 64
1:
    mov [rdi + 64 * 0], al
    mov [rdi + 64 * 1], eax
    mov [rdi + 64 * 2], eax
    mov [rdi + 64 * 3], eax
    mov [rdi + 64 * 4], eax
    mov [rdi + 64 * 5], eax
    mov [rdi + 64 * 6], eax
    mov [rdi + 64 * 7], eax
    add rdi, 64 * 8
    cmp rdi, rsi
    jne 1b

.att_syntax
)"::: "rdi", "rsi"
    );
}

#include <chrono>
#include <iostream>

using namespace std;

void measure(const char* name, void (*f)()) {
    for (int i = 0; i < 20; i++) f();
    auto start = chrono::steady_clock::now();
    for (int i = 0; i < 200; i++) f();
    auto end = chrono::steady_clock::now();
    cout << name << ": " << (end - start).count() * 1e-6 << " ms" << endl;
}
int main() {
    measure("reading", read);
    measure("writing", write);
}

My output:

reading: 113.529 ms
writing: 2254.57 ms

Upvotes: 2

Views: 149

Answers (0)

Related Questions