diff --git a/cores/arduino/delay.c b/cores/arduino/delay.c
index 5640e71f1ce4dc0acbe5beb151cf1da036267c04..59189351d9069140bcfc49524123fa07a3a60809 100644
--- a/cores/arduino/delay.c
+++ b/cores/arduino/delay.c
@@ -5,12 +5,13 @@
 extern "C" {
 #endif
 
+/** Tick Counter united by ms */
+static volatile uint32_t _ulTickCount=0 ;
+
 uint32_t millis( void )
 {
-/* TODO
 // todo: ensure no interrupts
-    return GetTickCount() ;
-*/ return 0ul ;
+  return _ulTickCount ;
 }
 
 // Interrupt-compatible version of micros
@@ -19,28 +20,27 @@ uint32_t millis( void )
 // values to calculate micros. If there is a pending SysTick, add one to the millis counter in the calculation.
 uint32_t micros( void )
 {
-/* TODO
-    uint32_t ticks, ticks2;
-    uint32_t pend, pend2;
-    uint32_t count, count2;
+  uint32_t ticks, ticks2;
+  uint32_t pend, pend2;
+  uint32_t count, count2;
 
-    ticks2  = SysTick->VAL;
-    pend2   = !!((SCB->ICSR & SCB_ICSR_PENDSTSET_Msk)||((SCB->SHCSR & SCB_SHCSR_SYSTICKACT_Msk)))  ;
-    count2  = GetTickCount();
+  ticks2  = SysTick->VAL;
+  pend2   = !!(SCB->ICSR & SCB_ICSR_PENDSTSET_Msk)  ;
+  count2  = _ulTickCount ;
 
-    do {
-        ticks=ticks2;
-        pend=pend2;
-        count=count2;
-        ticks2  = SysTick->VAL;
-        pend2   = !!((SCB->ICSR & SCB_ICSR_PENDSTSET_Msk)||((SCB->SHCSR & SCB_SHCSR_SYSTICKACT_Msk)))  ;
-        count2  = GetTickCount();
-    } while ((pend != pend2) || (count != count2) || (ticks < ticks2));
+  do
+  {
+    ticks=ticks2;
+    pend=pend2;
+    count=count2;
+    ticks2  = SysTick->VAL;
+    pend2   = !!(SCB->ICSR & SCB_ICSR_PENDSTSET_Msk)  ;
+    count2  = _ulTickCount ;
+  } while ((pend != pend2) || (count != count2) || (ticks < ticks2));
 
-    return ((count+pend) * 1000) + (((SysTick->LOAD  - ticks)*(1048576/(F_CPU/1000000)))>>20) ;
-    // this is an optimization to turn a runtime division into two compile-time divisions and
-    // a runtime multiplication and shift, saving a few cycles
-*/ return 0ul ;
+  return ((count+pend) * 1000) + (((SysTick->LOAD  - ticks)*(1048576/(VARIANT_MCK/1000000)))>>20) ;
+  // this is an optimization to turn a runtime division into two compile-time divisions and
+  // a runtime multiplication and shift, saving a few cycles
 }
 
 // original function:
@@ -61,14 +61,17 @@ uint32_t micros( void )
 
 void delay( uint32_t ms )
 {
-/* TODO
-    if (ms == 0)
-        return;
-    uint32_t start = GetTickCount();
-    do {
-        yield();
-    } while (GetTickCount() - start < ms);
-*/
+  if ( ms == 0 )
+  {
+    return ;
+  }
+
+  uint32_t start = _ulTickCount ;
+
+  do
+  {
+    yield() ;
+  } while ( _ulTickCount - start < ms ) ;
 }
 
 #ifdef __cplusplus
diff --git a/cores/arduino/delay.h b/cores/arduino/delay.h
index 2ea5d0c1d53a78084323784a6fb348d77b8de212..f9b08efeff3b608ef09265eb2f261cc46ff72919 100644
--- a/cores/arduino/delay.h
+++ b/cores/arduino/delay.h
@@ -64,16 +64,31 @@ static inline void delayMicroseconds(uint32_t) __attribute__((always_inline, unu
 static inline void delayMicroseconds(uint32_t usec){
     if (usec == 0) return;
     uint32_t n = usec * (VARIANT_MCK / 3000000);
-/*
-    asm volatile(
+
+    __asm__ volatile(
         "L_%=_delayMicroseconds:"       "\n\t"
         "subs   %0, #1"                 "\n\t"
         "bne    L_%=_delayMicroseconds" "\n"
         : "+r" (n) :
     );
-*/
 }
 
+/*
+__attribute__((naked)) static void delay_loop(unsigned n)
+{
+	__asm volatile ("1: subs r0, r0, #1");
+	__asm volatile (" bne 1b");
+	__asm volatile (" bx lr");
+}
+
+void delay_microseconds(unsigned n)
+{
+	// Bogus assumption:
+	// Assume 8 cycles/iteration and running at 80MHz
+	delay_loop(n * 10);
+}
+*/
+
 #ifdef __cplusplus
 }
 #endif