Mokus
Mokus

Reputation: 10400

How can I initialize the Raspberry properly?

I wrote a motor controller and I tested on a respberry pi using Arch Arm Linux distro, to calculate the control signal took ~0.4ms, so I thought I can make better if I'm using real time OS, so I started with ChibiOS, but there the runtime was ~2.5ms, first I used Crossfire cross compiler than I switch to linaro, with the linaro the runtime was a bit worse ~2.7ms. What can be the problem? Is there possible that I'm not initializing the HW in an optimal way?

     /*
     * Stack pointers initialization.
     */
    ldr     r0, =__ram_end__
    /* Undefined */
    msr     CPSR_c, #MODE_UND | I_BIT | F_BIT
    mov     sp, r0
    ldr     r1, =__und_stack_size__
    sub     r0, r0, r1
    /* Abort */
    msr     CPSR_c, #MODE_ABT | I_BIT | F_BIT
    mov     sp, r0
    ldr     r1, =__abt_stack_size__
    sub     r0, r0, r1
    /* FIQ */
    msr     CPSR_c, #MODE_FIQ | I_BIT | F_BIT
    mov     sp, r0
    ldr     r1, =__fiq_stack_size__
    sub     r0, r0, r1
    /* IRQ */
    msr     CPSR_c, #MODE_IRQ | I_BIT | F_BIT
    mov     sp, r0
    ldr     r1, =__irq_stack_size__
    sub     r0, r0, r1
    /* Supervisor */
    msr     CPSR_c, #MODE_SVC | I_BIT | F_BIT
    mov     sp, r0
    ldr     r1, =__svc_stack_size__
    sub     r0, r0, r1
    /* System */
    msr     CPSR_c, #MODE_SYS | I_BIT | F_BIT
    mov     sp, r0

    mov r0,#0x8000
    mov r1,#0x0000
    ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
    stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
    ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
    stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}


    ;@ enable fpu
    mrc p15, 0, r0, c1, c0, 2
    orr r0,r0,#0x300000 ;@ single precision
    orr r0,r0,#0xC00000 ;@ double precision
    mcr p15, 0, r0, c1, c0, 2
    mov r0,#0x40000000
    fmxr fpexc,r0
    mov     r0, #0
    ldr     r1, =_bss_start
    ldr     r2, =_bss_end

And the memory setup:

__und_stack_size__  = 0x0004;
__abt_stack_size__  = 0x0004;
__fiq_stack_size__  = 0x0010;
__irq_stack_size__  = 0x0080;
__svc_stack_size__  = 0x0004;
__sys_stack_size__  = 0x0400;
__stacks_total_size__   = __und_stack_size__ + __abt_stack_size__ + __fiq_stack_size__ + __irq_stack_size__ + __svc_stack_size__ + __sys_stack_size__;

MEMORY
{
    ram : org = 0x8000, len = 0x06000000 - 0x20
}

__ram_start__       = ORIGIN(ram);
__ram_size__        = LENGTH(ram);
__ram_end__     = __ram_start__ + __ram_size__;

SECTIONS
{
    . = 0;

    .text : ALIGN(16) SUBALIGN(16)
    {
        _text = .;
        KEEP(*(vectors))
        *(.text)
        *(.text.*)
        *(.rodata)
        *(.rodata.*)
        *(.glue_7t)
        *(.glue_7)
        *(.gcc*)
        *(.ctors)
        *(.dtors)
    } > ram

    .ARM.extab : {*(.ARM.extab* .gnu.linkonce.armextab.*)} > ram

    __exidx_start = .;
    .ARM.exidx : {*(.ARM.exidx* .gnu.linkonce.armexidx.*)} > ram
    __exidx_end = .;

    .eh_frame_hdr : {*(.eh_frame_hdr)}

    .eh_frame : ONLY_IF_RO {*(.eh_frame)}

    . = ALIGN(4);
    _etext = .;
    _textdata = _etext;

    .data :
    {
        _data = .;
        *(.data)
        . = ALIGN(4);
        *(.data.*)
        . = ALIGN(4);
        *(.ramtext)
        . = ALIGN(4);
        _edata = .;
    } > ram 

    .bss :
    {
        _bss_start = .;
        *(.bss)
        . = ALIGN(4);
        *(.bss.*)
        . = ALIGN(4);
        *(COMMON)
        . = ALIGN(4);
        _bss_end = .;
    } > ram    
}

PROVIDE(end = .);
_end = .;

__heap_base__              = _end;
__heap_end__               = __ram_end__ - __stacks_total_size__;
__main_thread_stack_base__ = __ram_end__ - __stacks_total_size__;

Where do I make the mistake(s)?

Upvotes: 2

Views: 383

Answers (1)

FrankH.
FrankH.

Reputation: 18217

A long time ago (yes, that means somewhen in the previous millenium), I used the old PC Speaker pcsp device driver (a little more current patch here) to control stepper motors via a relay attached to the data lines of the parallel port.
Note that's not the same driver as the current pcspkr driver (which only writes to the actual speaker, not to the parallel port); the parallel-output-capable parts of pcsp were never ported to the 2.6 audio architecture.

The trick there is that the driver can register a (high-priority, if needed) interrupt routine that does the actual device register / IO port writes to change the line state. As a result, you simply ioctl() the sample rate to the driver, and then just asynchronously write "ramps" (of data signals to step up/down to/from a certain speed or to perform a number of steps) created in-memory - the driver will then spool them for you, without the need for additional timing-/scheduling-sensitive code.

In the end you got an 8bit digital signal on the parallel port data pins, with timing precision as high as your timer interrupt allows.
There were sufficient lines to drive a stepper; if you wanted to make it turn a given number of steps, you had to:

  • create a "ramp up" to speed it up from still to fastest
  • create a "rect wave" to keep it turning
  • create a "ramp down" to slow it down to still again

If the number of steps was small, write the whole thing in one go, other wise, write the ramp-up, then write as many of the rect-wave blocks as needed, then the ramp down. Although you'd program possibly thousands of steps in one go, you'd only write three blocks of mem a few kB each, and the driver's interrupt handler does the rest.

It sounded rather funny if you attached a resistor-array DAC convertor ;-)

The approach can be generalized to the RaspPI; from the interrupt routine, simply write a GPIO control register (on ARM, device regs are always memory mapped, so it's simply a memory access).

Decoupling the "ramp" / "control signal" generation from the timing-sensitive state change (the "control signal application", in effect) and delegating the latter to the interrupt part of a device driver allows to do such tasks with "normal" Linux.

Your timing precision, again, is limited by rate and jitter of your timer interrupt. The RaspPI is capable of running higher timer interrupt rates than an i386 was. I'm pretty sure 1ms isn't a challenge with this approach (it wasn't in 1995). The methodology depends, as said, on the ability to precreate the signal.

Upvotes: 3

Related Questions