Reputation: 10400
I wrote a motor controller and I tested on a respberry pi using Arch Arm Linux distro, to calculate the control signal took ~0.4ms, so I thought I can make better if I'm using real time OS, so I started with ChibiOS, but there the runtime was ~2.5ms, first I used Crossfire cross compiler than I switch to linaro, with the linaro the runtime was a bit worse ~2.7ms. What can be the problem? Is there possible that I'm not initializing the HW in an optimal way?
/*
* Stack pointers initialization.
*/
ldr r0, =__ram_end__
/* Undefined */
msr CPSR_c, #MODE_UND | I_BIT | F_BIT
mov sp, r0
ldr r1, =__und_stack_size__
sub r0, r0, r1
/* Abort */
msr CPSR_c, #MODE_ABT | I_BIT | F_BIT
mov sp, r0
ldr r1, =__abt_stack_size__
sub r0, r0, r1
/* FIQ */
msr CPSR_c, #MODE_FIQ | I_BIT | F_BIT
mov sp, r0
ldr r1, =__fiq_stack_size__
sub r0, r0, r1
/* IRQ */
msr CPSR_c, #MODE_IRQ | I_BIT | F_BIT
mov sp, r0
ldr r1, =__irq_stack_size__
sub r0, r0, r1
/* Supervisor */
msr CPSR_c, #MODE_SVC | I_BIT | F_BIT
mov sp, r0
ldr r1, =__svc_stack_size__
sub r0, r0, r1
/* System */
msr CPSR_c, #MODE_SYS | I_BIT | F_BIT
mov sp, r0
mov r0,#0x8000
mov r1,#0x0000
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
ldmia r0!,{r2,r3,r4,r5,r6,r7,r8,r9}
stmia r1!,{r2,r3,r4,r5,r6,r7,r8,r9}
;@ enable fpu
mrc p15, 0, r0, c1, c0, 2
orr r0,r0,#0x300000 ;@ single precision
orr r0,r0,#0xC00000 ;@ double precision
mcr p15, 0, r0, c1, c0, 2
mov r0,#0x40000000
fmxr fpexc,r0
mov r0, #0
ldr r1, =_bss_start
ldr r2, =_bss_end
And the memory setup:
__und_stack_size__ = 0x0004;
__abt_stack_size__ = 0x0004;
__fiq_stack_size__ = 0x0010;
__irq_stack_size__ = 0x0080;
__svc_stack_size__ = 0x0004;
__sys_stack_size__ = 0x0400;
__stacks_total_size__ = __und_stack_size__ + __abt_stack_size__ + __fiq_stack_size__ + __irq_stack_size__ + __svc_stack_size__ + __sys_stack_size__;
MEMORY
{
ram : org = 0x8000, len = 0x06000000 - 0x20
}
__ram_start__ = ORIGIN(ram);
__ram_size__ = LENGTH(ram);
__ram_end__ = __ram_start__ + __ram_size__;
SECTIONS
{
. = 0;
.text : ALIGN(16) SUBALIGN(16)
{
_text = .;
KEEP(*(vectors))
*(.text)
*(.text.*)
*(.rodata)
*(.rodata.*)
*(.glue_7t)
*(.glue_7)
*(.gcc*)
*(.ctors)
*(.dtors)
} > ram
.ARM.extab : {*(.ARM.extab* .gnu.linkonce.armextab.*)} > ram
__exidx_start = .;
.ARM.exidx : {*(.ARM.exidx* .gnu.linkonce.armexidx.*)} > ram
__exidx_end = .;
.eh_frame_hdr : {*(.eh_frame_hdr)}
.eh_frame : ONLY_IF_RO {*(.eh_frame)}
. = ALIGN(4);
_etext = .;
_textdata = _etext;
.data :
{
_data = .;
*(.data)
. = ALIGN(4);
*(.data.*)
. = ALIGN(4);
*(.ramtext)
. = ALIGN(4);
_edata = .;
} > ram
.bss :
{
_bss_start = .;
*(.bss)
. = ALIGN(4);
*(.bss.*)
. = ALIGN(4);
*(COMMON)
. = ALIGN(4);
_bss_end = .;
} > ram
}
PROVIDE(end = .);
_end = .;
__heap_base__ = _end;
__heap_end__ = __ram_end__ - __stacks_total_size__;
__main_thread_stack_base__ = __ram_end__ - __stacks_total_size__;
Where do I make the mistake(s)?
Upvotes: 2
Views: 383
Reputation: 18217
A long time ago (yes, that means somewhen in the previous millenium), I used the old PC Speaker pcsp
device driver (a little more current patch here) to control stepper motors via a relay attached to the data lines of the parallel port.
Note that's not the same driver as the current pcspkr
driver (which only writes to the actual speaker, not to the parallel port); the parallel-output-capable parts of pcsp
were never ported to the 2.6 audio architecture.
The trick there is that the driver can register a (high-priority, if needed) interrupt routine that does the actual device register / IO port writes to change the line state. As a result, you simply ioctl()
the sample rate to the driver, and then just asynchronously write "ramps" (of data signals to step up/down to/from a certain speed or to perform a number of steps) created in-memory - the driver will then spool them for you, without the need for additional timing-/scheduling-sensitive code.
In the end you got an 8bit digital signal on the parallel port data pins, with timing precision as high as your timer interrupt allows.
There were sufficient lines to drive a stepper; if you wanted to make it turn a given number of steps, you had to:
If the number of steps was small, write the whole thing in one go, other wise, write the ramp-up, then write as many of the rect-wave blocks as needed, then the ramp down. Although you'd program possibly thousands of steps in one go, you'd only write three blocks of mem a few kB each, and the driver's interrupt handler does the rest.
It sounded rather funny if you attached a resistor-array DAC convertor ;-)
The approach can be generalized to the RaspPI; from the interrupt routine, simply write a GPIO control register (on ARM, device regs are always memory mapped, so it's simply a memory access).
Decoupling the "ramp" / "control signal" generation from the timing-sensitive state change (the "control signal application", in effect) and delegating the latter to the interrupt part of a device driver allows to do such tasks with "normal" Linux.
Your timing precision, again, is limited by rate and jitter of your timer interrupt. The RaspPI is capable of running higher timer interrupt rates than an i386 was. I'm pretty sure 1ms isn't a challenge with this approach (it wasn't in 1995). The methodology depends, as said, on the ability to precreate the signal.
Upvotes: 3