Merge branch 'feature/tls_support' into 'master'

FreeRTOS TLS support

See merge request idf/esp-idf!1902
This commit is contained in:
Ivan Grokhotkov 2018-02-09 12:34:20 +08:00
commit 417ef19084
6 changed files with 210 additions and 2 deletions

View File

@ -194,6 +194,13 @@ SECTIONS
*(.gnu.linkonce.lit4.*)
_lit4_end = ABSOLUTE(.);
. = ALIGN(4);
_thread_local_start = ABSOLUTE(.);
*(.tdata)
*(.tdata.*)
*(.tbss)
*(.tbss.*)
_thread_local_end = ABSOLUTE(.);
. = ALIGN(4);
} >drom0_0_seg
.flash.text :

View File

@ -380,6 +380,9 @@ is used in assert() statements. */
* @return pdPASS if the task was successfully created and added to a ready
* list, otherwise an error code defined in the file projdefs.h
*
* @note If program uses thread local variables (ones specified with "__thread" keyword)
* then storage for them will be allocated on the task's stack.
*
* Example usage:
* @code{c}
* // Task to be created.
@ -530,6 +533,9 @@ is used in assert() statements. */
* are NULL then the task will not be created and
* errCOULD_NOT_ALLOCATE_REQUIRED_MEMORY is returned.
*
* @note If program uses thread local variables (ones specified with "__thread" keyword)
* then storage for them will be allocated on the task's stack.
*
* Example usage:
* @code{c}
*

View File

@ -92,6 +92,7 @@
*/
#include <stdlib.h>
#include <string.h>
#include <xtensa/config/core.h>
#include "xtensa_rtos.h"
@ -146,9 +147,24 @@ StackType_t *pxPortInitialiseStack( StackType_t *pxTopOfStack, TaskFunction_t px
#if XCHAL_CP_NUM > 0
uint32_t *p;
#endif
uint32_t *threadptr;
void *task_thread_local_start;
extern int _thread_local_start, _thread_local_end, _rodata_start;
// TODO: check that TLS area fits the stack
uint32_t thread_local_sz = (uint8_t *)&_thread_local_end - (uint8_t *)&_thread_local_start;
/* Create interrupt stack frame aligned to 16 byte boundary */
sp = (StackType_t *) (((UBaseType_t)(pxTopOfStack + 1) - XT_CP_SIZE - XT_STK_FRMSZ) & ~0xf);
thread_local_sz = ALIGNUP(0x10, thread_local_sz);
/* Initialize task's stack so that we have the following structure at the top:
----LOW ADDRESSES ----------------------------------------HIGH ADDRESSES----------
task stack | interrupt stack frame | thread local vars | co-processor save area |
----------------------------------------------------------------------------------
| |
SP pxTopOfStack
All parts are aligned to 16 byte boundary. */
sp = (StackType_t *) (((UBaseType_t)(pxTopOfStack + 1) - XT_CP_SIZE - thread_local_sz - XT_STK_FRMSZ) & ~0xf);
/* Clear the entire frame (do not use memset() because we don't depend on C library) */
for (tp = sp; tp <= pxTopOfStack; ++tp)
@ -178,6 +194,14 @@ StackType_t *pxPortInitialiseStack( StackType_t *pxTopOfStack, TaskFunction_t px
frame->vpri = 0xFFFFFFFF;
#endif
/* Init threadptr reg and TLS vars */
task_thread_local_start = (void *)(((uint32_t)pxTopOfStack - XT_CP_SIZE - thread_local_sz) & ~0xf);
memcpy(task_thread_local_start, &_thread_local_start, thread_local_sz);
threadptr = (uint32_t *)(sp + XT_STK_EXTRA);
/* shift threadptr by the offset of _thread_local_start from DROM start;
need to take into account extra 16 bytes offset */
*threadptr = (uint32_t)task_thread_local_start - ((uint32_t)&_thread_local_start - (uint32_t)&_rodata_start) - 0x10;
#if XCHAL_CP_NUM > 0
/* Init the coprocessor save area (see xtensa_context.h) */
/* No access to TCB here, so derive indirectly. Stack growth is top to bottom.

View File

@ -0,0 +1,105 @@
/*
Test for thread local storage support.
*/
#include <string.h>
#include <esp_types.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "unity.h"
#include "sdkconfig.h"
static __thread int tl_test_var1;
static __thread uint8_t tl_test_var2 = 55;
static __thread uint16_t tl_test_var3 = 44;
static __thread uint8_t tl_test_arr_var[10];
static __thread struct test_tls_var {
int f32;
uint8_t f8;
uint16_t f16;
uint8_t farr[10];
} tl_test_struct_var;
static void task_test_tls(void *arg)
{
bool *running = (bool *)arg;
uint32_t tp = (uint32_t)-1;
int test_var1_old = 0;
uint8_t test_var2_old = 0;
uint16_t test_var3_old = 0;
int f32_old = 0;
uint8_t f8_old = 0;
uint16_t f16_old = 0;
asm volatile ("rur.threadptr %0":"=r"(tp));
for (int i = 0; i < 5; i++) {
printf("Task[%x]: var = 0x%x 0x%x\n", tp, tl_test_var1, tl_test_var2);
if (i == 0) {
TEST_ASSERT_EQUAL(0, tl_test_var1);
TEST_ASSERT_EQUAL(55, tl_test_var2);
TEST_ASSERT_EQUAL(44, tl_test_var3);
for (int k = 0; k < sizeof(tl_test_arr_var); k++) {
TEST_ASSERT_EQUAL(0, tl_test_arr_var[k]);
}
TEST_ASSERT_EQUAL(0, tl_test_struct_var.f32);
TEST_ASSERT_EQUAL(0, tl_test_struct_var.f8);
TEST_ASSERT_EQUAL(0, tl_test_struct_var.f16);
for (int k = 0; k < sizeof(tl_test_struct_var.farr); k++) {
TEST_ASSERT_EQUAL(0, tl_test_struct_var.farr[k]);
}
} else {
TEST_ASSERT_EQUAL(test_var1_old+1, tl_test_var1);
TEST_ASSERT_EQUAL(test_var2_old+1, tl_test_var2);
TEST_ASSERT_EQUAL(test_var3_old+1, tl_test_var3);
for (int k = 0; k < sizeof(tl_test_arr_var); k++) {
TEST_ASSERT_EQUAL(i-1, tl_test_arr_var[k]);
}
TEST_ASSERT_EQUAL(f32_old+1, tl_test_struct_var.f32);
TEST_ASSERT_EQUAL(f8_old+1, tl_test_struct_var.f8);
TEST_ASSERT_EQUAL(f16_old+1, tl_test_struct_var.f16);
for (int k = 0; k < sizeof(tl_test_struct_var.farr); k++) {
TEST_ASSERT_EQUAL(i-1, tl_test_struct_var.farr[k]);
}
}
test_var1_old = tl_test_var1;
test_var2_old = tl_test_var2;
test_var3_old = tl_test_var3;
f32_old = tl_test_struct_var.f32;
f8_old = tl_test_struct_var.f8;
f16_old = tl_test_struct_var.f16;
tl_test_var1++;
tl_test_var2++;
tl_test_var3++;
memset(tl_test_arr_var, i, sizeof(tl_test_arr_var));
tl_test_struct_var.f32++;
tl_test_struct_var.f8++;
tl_test_struct_var.f16++;
memset(tl_test_struct_var.farr, i, sizeof(tl_test_struct_var.farr));
vTaskDelay(10);
}
if (running) {
*running = false;
vTaskDelete(NULL);
}
}
TEST_CASE("TLS test", "[freertos]")
{
static StackType_t s_stack[2048];
StaticTask_t s_task;
bool running[2] = {true, true};
#if CONFIG_FREERTOS_UNICORE == 0
int other_core = 1;
#else
int other_core = 0;
#endif
xTaskCreatePinnedToCore((TaskFunction_t)&task_test_tls, "task_test_tls", 3072, &running[0], 5, NULL, 0);
xTaskCreateStaticPinnedToCore((TaskFunction_t)&task_test_tls, "task_test_tls", sizeof(s_stack), &running[1],
5, s_stack, &s_task, other_core);
while (running[0] || running[1]) {
vTaskDelay(10);
}
}

View File

@ -10,6 +10,7 @@ API Guides
ESP32 Core Dump <core_dump>
Flash Encryption <../security/flash-encryption>
FreeRTOS SMP Changes <freertos-smp>
Thread Local Storage <thread-local-storage>
High Level Interrupts <hlinterrupts>
JTAG Debugging <jtag-debugging/index>
Partition Tables <partition-tables>

View File

@ -0,0 +1,65 @@
Thread Local Storage
====================
Overview
--------
Thread-local storage (TLS) is a mechanism by which variables are allocated such that there
is one instance of the variable per extant thread. ESP-IDF provides three ways to make use
of such variables:
- :ref:`freertos-native`: ESP-IDF FreeRTOS native API.
- :ref:`pthread-api`: ESP-IDF's pthread API.
- :ref:`c11-std`: C11 standard introduces special keyword to declare variables as thread local.
.. _freertos-native:
FreeRTOS Native API
--------------------
The ESP-IDF FreeRTOS provides the following API to manage thread local variables:
- :cpp:func:`vTaskSetThreadLocalStoragePointer`
- :cpp:func:`pvTaskGetThreadLocalStoragePointer`
- :cpp:func:`vTaskSetThreadLocalStoragePointerAndDelCallback`
In this case maximum number of variables that can be allocated is limited by
``configNUM_THREAD_LOCAL_STORAGE_POINTERS`` macro. Variables are kept in the task control block (TCB)
and accessed by their index. Note that index 0 is reserved for ESP-IDF internal uses.
Using that API user can allocate thread local variables of an arbitrary size and assign them to any number of tasks.
Different tasks can have different sets of TLS variables.
If size of the variable is more then 4 bytes then user is responsible for allocating/deallocating memory for it.
Variable's deallocation is initiated by FreeRTOS when task is deleted, but user must provide function (callback)
to do proper cleanup.
.. _pthread-api:
Pthread API
----------------
The ESP-IDF provides the following pthread API to manage thtread local variables:
- :cpp:func:`pthread_key_create`
- :cpp:func:`pthread_key_delete`
- :cpp:func:`pthread_getspecific`
- :cpp:func:`pthread_setspecific`
This API has all benefits of the one above, but eliminates some its limits. The number of variables is
limited only by size of available memory on the heap.
Due to the dynamic nature this API introduces additional performance overhead compared to the native one.
.. _c11-std:
C11 Standard
------------
The ESP-IDF FreeRTOS supports thread local variables according to C11 standard (ones specified with ``__thread`` keyword).
For details on this GCC feature please see https://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/Thread-Local.html#Thread-Local.
Storage for that kind of variables is allocated on the task's stack.
Note that area for all such variables in the program will be allocated on the stack of
every task in the system even if that task does not use such variables at all. For example
ESP-IDF system tasks (like ``ipc``, ``timer`` tasks etc.) will also have that extra stack space allocated.
So this feature should be used with care. There is a tradeoff: C11 thread local variables are quite handy
to use in programming and can be accessed using just a few Xtensa instructions, but this benefit goes
with the cost of additional stack usage for all tasks in the system.
Due to static nature of variables allocation all tasks in the system have the same sets of C11 thread local variables.