cancel
Showing results for 
Search instead for 
Did you mean: 

How to optimize SDIO WIFI performance?

Hello,

I am working on a project that is using wifi to communicate with the connected device and the latency is critical. We are using the wifi JODY-W263-00B from U-Blox connected by SDIO bus, and for measuring the latency we simply use ping command. The average latency of our setup is around 10 ms. As this project is the third generation of our product we have a comparison with the previous generation that was based on the imx6 chip from NXP, and with it, we were able to reach average latency of around 1.8 ms, but we were using a different wifi module (ELLA-W161). To make this comparable we have made a few measurements using different CPUs, wifi modules, and connection types. Our results are in this spreadsheet. In the name/board column is the name of the device to which was wifi module connected, the WIFI connection column shows how we connected module/evk kit, and columns under Ping [ms] are our measurements. Other columns are self-explanatory as I hope.

From measurements, you can see that the average latency of wifi modules connected to the imx6 is lower than the latency of STM32MP1-based modules, regardless of the HW connection or used driver. This implies that the issue is with the STM SDIO implementation, either in HW or SW.

For our tests, we used the latest ST ecosystem for ST devices and the current upstream for imx6 (kernel 6.1 but the driver supports only 5.15 but it still performs better).

Can you please help me to acknowledge this issue and hopefully resolve it?

p.s.: the tests were done using wipi wifi module and Windows 11.

2 ACCEPTED SOLUTIONS

Accepted Solutions
Gregory PLANCHON
ST Employee

Hello,

we have found the cause of this problem, in order to get better performance, we must add the following lines in the device tree :

 brcmf: bcrmf@1 {

          reg = <1>;

          compatible = "brcm,bcm4329-fmac";

+       interrupt-parent = <&gpiod>;

+       interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; /* WL_HOST_WAKE */

+       interrupt-names = "host-wake";

      };

if you have any other question, don't hesitate to ask me

Regards,

Grégory

In order to give better visibility on the answered topics, please click on 'Accept as Solution' on the reply which solved your issue or answered your question.

View solution in original post

Hi,
with the recent changes in the 6.1 ST kernel branch that enables the SDIO interrupt, I have managed to resolve the latency issue for 8987 chipset used in the JODY-W2 module.

If anyone is facing latency issues you can try to apply the attached patch to backport the new SDIO interrupt functionality to the ST kernel version 5.15, and add cap-sdio-irq property to the DTS.

My thanks to the kernel team for implementing this. Do you plan to backport it to the 5.15 kernel as well?

Best regards,

Tomáš.

p.s.: Since I cannot upload the patch as a file, I add it as text.

From 82ee7ffbaa937a7a3edc8ca95aa93bff7ab7d097 Mon Sep 17 00:00:00 2001
From: Tomas Jurena <tjurena@techniserv.cz>
Date: Thu, 29 Jun 2023 05:03:24 +0000
Subject: [PATCH] mmc: mmci: stm32: Backport changes from kernel 6.1

---
drivers/mmc/host/mmci.c | 67 ++++++++++++++++++++++++++++-
drivers/mmc/host/mmci.h | 4 ++
drivers/mmc/host/mmci_stm32_sdmmc.c | 21 +++++++++
3 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 3d416c4ed..05cfc28a5 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -270,6 +270,7 @@ static struct variant_data variant_stm32_sdmmc = {
.datactrl_any_blocksz = true,
.datactrl_mask_sdio = MCI_DPSM_ST_SDIOEN,
.stm32_idmabsize_mask = GENMASK(12, 5),
+ .use_sdio_irq = true,
.busy_timeout = true,
.busy_detect = true,
.busy_detect_flag = MCI_STM32_BUSYD0,
@@ -296,6 +297,7 @@ static struct variant_data variant_stm32_sdmmcv2 = {
.datactrl_any_blocksz = true,
.datactrl_mask_sdio = MCI_DPSM_ST_SDIOEN,
.stm32_idmabsize_mask = GENMASK(16, 5),
+ .use_sdio_irq = true,
.dma_lli = true,
.busy_timeout = true,
.busy_detect = true,
@@ -392,6 +394,10 @@ static void mmci_write_datactrlreg(struct mmci_host *host, u32 datactrl)
/* Keep busy mode in DPSM if enabled */
datactrl |= host->datactrl_reg & host->variant->busy_dpsm_flag;

+ /* Keep SD I/O interrupt mode enabled */
+ if (host->variant->use_sdio_irq && host->mmc->caps & MMC_CAP_SDIO_IRQ)
+ datactrl |= host->variant->datactrl_mask_sdio;
+
if (host->datactrl_reg != datactrl) {
host->datactrl_reg = datactrl;
writel(datactrl, host->base + MMCIDATACTRL);
@@ -1644,6 +1650,11 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
mmci_data_irq(host, host->data, status);
}

+ if (host->variant->use_sdio_irq &&
+ host->mmc->caps & MMC_CAP_SDIO_IRQ &&
+ host->ops && host->ops->sdio_irq)
+ host->ops->sdio_irq(host, status);
+
/*
* Busy detection has been handled by mmci_cmd_irq() above.
* Clear the status bit to prevent polling in IRQ context.
@@ -1729,7 +1740,8 @@ static void mmci_set_max_busy_timeout(struct mmc_host *mmc)
return;

if (host->variant->busy_timeout && mmc->actual_clock)
- max_busy_timeout = ~0UL / (mmc->actual_clock / MSEC_PER_SEC);
+ max_busy_timeout = U32_MAX / DIV_ROUND_UP(mmc->actual_clock,
+ MSEC_PER_SEC);

mmc->max_busy_timeout = max_busy_timeout;
}
@@ -1883,6 +1895,45 @@ static int mmci_sig_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
return ret;
}

+static void mmci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+ struct mmci_host *host = mmc_priv(mmc);
+ unsigned long flags;
+
+ if (!host->variant->use_sdio_irq)
+ return;
+
+ if (host->ops && host->ops->enable_sdio_irq) {
+ if (enable)
+ /* Keep device active while SDIO IRQ is enabled */
+ pm_runtime_get_sync(mmc_dev(mmc));
+
+ spin_lock_irqsave(&host->lock, flags);
+ host->ops->enable_sdio_irq(host, enable);
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ if (!enable) {
+ pm_runtime_mark_last_busy(mmc_dev(mmc));
+ pm_runtime_put_autosuspend(mmc_dev(mmc));
+ }
+ }
+}
+
+static void mmci_ack_sdio_irq(struct mmc_host *mmc)
+{
+ struct mmci_host *host = mmc_priv(mmc);
+ unsigned long flags;
+
+ if (!host->variant->use_sdio_irq)
+ return;
+
+ if (host->ops && host->ops->enable_sdio_irq) {
+ spin_lock_irqsave(&host->lock, flags);
+ host->ops->enable_sdio_irq(host, 1);
+ spin_unlock_irqrestore(&host->lock, flags);
+ }
+}
+
static struct mmc_host_ops mmci_ops = {
.request = mmci_request,
.pre_req = mmci_pre_request,
@@ -1891,6 +1942,8 @@ static struct mmc_host_ops mmci_ops = {
.get_ro = mmc_gpio_get_ro,
.get_cd = mmci_get_cd,
.start_signal_voltage_switch = mmci_sig_volt_switch,
+ .enable_sdio_irq = mmci_enable_sdio_irq,
+ .ack_sdio_irq = mmci_ack_sdio_irq,
};

static void mmci_probe_level_translator(struct mmc_host *mmc)
@@ -2158,6 +2211,14 @@ static int mmci_probe(struct amba_device *dev,
mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
}

+ if (variant->use_sdio_irq && host->mmc->caps & MMC_CAP_SDIO_IRQ) {
+ mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
+
+ if (variant->datactrl_mask_sdio)
+ mmci_write_datactrlreg(host,
+ host->variant->datactrl_mask_sdio);
+ }
+
/* Variants with mandatory busy timeout in HW needs R1B responses. */
if (variant->busy_timeout)
mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
@@ -2254,7 +2315,9 @@ static int mmci_probe(struct amba_device *dev,
pm_runtime_set_autosuspend_delay(&dev->dev, 50);
pm_runtime_use_autosuspend(&dev->dev);

- mmc_add_host(mmc);
+ ret = mmc_add_host(mmc);
+ if (ret)
+ goto clk_disable;

pm_runtime_put(&dev->dev);
return 0;
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index e1a9b96a3..a710cd686 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -316,6 +316,7 @@ struct mmci_host;
* @opendrain: bitmask identifying the OPENDRAIN bit inside MMCIPOWER register
* @dma_lli: true if variant has dma link list feature.
* @stm32_idmabsize_mask: stm32 sdmmc idma buffer size.
+ * @use_sdio_irq: allow SD I/O card to interrupt the host
*/
struct variant_data {
unsigned int clkreg;
@@ -360,6 +361,7 @@ struct variant_data {
u32 start_err;
u32 opendrain;
u8 dma_lli:1;
+ u8 use_sdio_irq:1;
u32 stm32_idmabsize_mask;
void (*init)(struct mmci_host *host);
};
@@ -383,6 +385,8 @@ struct mmci_host_ops {
bool (*busy_complete)(struct mmci_host *host, u32 status, u32 err_msk);
void (*pre_sig_volt_switch)(struct mmci_host *host);
int (*post_sig_volt_switch)(struct mmci_host *host, struct mmc_ios *ios);
+ void (*enable_sdio_irq)(struct mmci_host *host, int enable);
+ void (*sdio_irq)(struct mmci_host *host, u32 status);
};

struct mmci_host {
diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
index 60bca78a7..400e84a93 100644
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -559,6 +559,25 @@ static int sdmmc_post_sig_volt_switch(struct mmci_host *host,
return ret;
}

+static void sdmmc_enable_sdio_irq(struct mmci_host *host, int enable)
+{
+ void __iomem *base = host->base;
+ u32 mask = readl_relaxed(base + MMCIMASK0);
+
+ if (enable)
+ writel_relaxed(mask | MCI_ST_SDIOITMASK, base + MMCIMASK0);
+ else
+ writel_relaxed(mask & ~MCI_ST_SDIOITMASK, base + MMCIMASK0);
+}
+
+static void sdmmc_sdio_irq(struct mmci_host *host, u32 status)
+{
+ if (status & MCI_ST_SDIOIT) {
+ sdmmc_enable_sdio_irq(host, 0);
+ sdio_signal_irq(host->mmc);
+ }
+}
+
static struct mmci_host_ops sdmmc_variant_ops = {
.validate_data = sdmmc_idma_validate_data,
.prep_data = sdmmc_idma_prep_data,
@@ -572,6 +591,8 @@ static struct mmci_host_ops sdmmc_variant_ops = {
.busy_complete = sdmmc_busy_complete,
.pre_sig_volt_switch = sdmmc_pre_sig_volt_vswitch,
.post_sig_volt_switch = sdmmc_post_sig_volt_switch,
+ .enable_sdio_irq = sdmmc_enable_sdio_irq,
+ .sdio_irq = sdmmc_sdio_irq,
};

void sdmmc_variant_init(struct mmci_host *host)

View solution in original post

6 REPLIES 6
Gregory PLANCHON
ST Employee

Hello @Tom�? Ju?ena​ , 

First of all, thanks for your investigation

we are currently working on it, we will come back to you as soon as we have more information.  

Regards,

Grégory

In order to give better visibility on the answered topics, please click on 'Accept as Solution' on the reply which solved your issue or answered your question.

Hi @Gregory PLANCHON​ ,

thanks for your interest in this topic. Please let me know if you need anything from me,

Best regards,

Tomas

Gregory PLANCHON
ST Employee

Hello,

we have found the cause of this problem, in order to get better performance, we must add the following lines in the device tree :

 brcmf: bcrmf@1 {

          reg = <1>;

          compatible = "brcm,bcm4329-fmac";

+       interrupt-parent = <&gpiod>;

+       interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; /* WL_HOST_WAKE */

+       interrupt-names = "host-wake";

      };

if you have any other question, don't hesitate to ask me

Regards,

Grégory

In order to give better visibility on the answered topics, please click on 'Accept as Solution' on the reply which solved your issue or answered your question.

Hi Grégory,

thank you for your solution. I have tested it with DK2 board and the latency is indeed improved. Can you please explain it to me in more detail? If I understand correctly the issue was the power management of the CPU which was not notified by an interrupt. Do you think it will also solve our issues with the JODY-W2?

I have checked the source code of moal driver and it looks like the interrupt is not generated if the CPU is not iMX. Also the driver requires an nxp,wifi-wake-host which I can't find so I think I will contact the nxp support.

Regards,

Tomáš

Hi,
with the recent changes in the 6.1 ST kernel branch that enables the SDIO interrupt, I have managed to resolve the latency issue for 8987 chipset used in the JODY-W2 module.

If anyone is facing latency issues you can try to apply the attached patch to backport the new SDIO interrupt functionality to the ST kernel version 5.15, and add cap-sdio-irq property to the DTS.

My thanks to the kernel team for implementing this. Do you plan to backport it to the 5.15 kernel as well?

Best regards,

Tomáš.

p.s.: Since I cannot upload the patch as a file, I add it as text.

From 82ee7ffbaa937a7a3edc8ca95aa93bff7ab7d097 Mon Sep 17 00:00:00 2001
From: Tomas Jurena <tjurena@techniserv.cz>
Date: Thu, 29 Jun 2023 05:03:24 +0000
Subject: [PATCH] mmc: mmci: stm32: Backport changes from kernel 6.1

---
drivers/mmc/host/mmci.c | 67 ++++++++++++++++++++++++++++-
drivers/mmc/host/mmci.h | 4 ++
drivers/mmc/host/mmci_stm32_sdmmc.c | 21 +++++++++
3 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 3d416c4ed..05cfc28a5 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -270,6 +270,7 @@ static struct variant_data variant_stm32_sdmmc = {
.datactrl_any_blocksz = true,
.datactrl_mask_sdio = MCI_DPSM_ST_SDIOEN,
.stm32_idmabsize_mask = GENMASK(12, 5),
+ .use_sdio_irq = true,
.busy_timeout = true,
.busy_detect = true,
.busy_detect_flag = MCI_STM32_BUSYD0,
@@ -296,6 +297,7 @@ static struct variant_data variant_stm32_sdmmcv2 = {
.datactrl_any_blocksz = true,
.datactrl_mask_sdio = MCI_DPSM_ST_SDIOEN,
.stm32_idmabsize_mask = GENMASK(16, 5),
+ .use_sdio_irq = true,
.dma_lli = true,
.busy_timeout = true,
.busy_detect = true,
@@ -392,6 +394,10 @@ static void mmci_write_datactrlreg(struct mmci_host *host, u32 datactrl)
/* Keep busy mode in DPSM if enabled */
datactrl |= host->datactrl_reg & host->variant->busy_dpsm_flag;

+ /* Keep SD I/O interrupt mode enabled */
+ if (host->variant->use_sdio_irq && host->mmc->caps & MMC_CAP_SDIO_IRQ)
+ datactrl |= host->variant->datactrl_mask_sdio;
+
if (host->datactrl_reg != datactrl) {
host->datactrl_reg = datactrl;
writel(datactrl, host->base + MMCIDATACTRL);
@@ -1644,6 +1650,11 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
mmci_data_irq(host, host->data, status);
}

+ if (host->variant->use_sdio_irq &&
+ host->mmc->caps & MMC_CAP_SDIO_IRQ &&
+ host->ops && host->ops->sdio_irq)
+ host->ops->sdio_irq(host, status);
+
/*
* Busy detection has been handled by mmci_cmd_irq() above.
* Clear the status bit to prevent polling in IRQ context.
@@ -1729,7 +1740,8 @@ static void mmci_set_max_busy_timeout(struct mmc_host *mmc)
return;

if (host->variant->busy_timeout && mmc->actual_clock)
- max_busy_timeout = ~0UL / (mmc->actual_clock / MSEC_PER_SEC);
+ max_busy_timeout = U32_MAX / DIV_ROUND_UP(mmc->actual_clock,
+ MSEC_PER_SEC);

mmc->max_busy_timeout = max_busy_timeout;
}
@@ -1883,6 +1895,45 @@ static int mmci_sig_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
return ret;
}

+static void mmci_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+ struct mmci_host *host = mmc_priv(mmc);
+ unsigned long flags;
+
+ if (!host->variant->use_sdio_irq)
+ return;
+
+ if (host->ops && host->ops->enable_sdio_irq) {
+ if (enable)
+ /* Keep device active while SDIO IRQ is enabled */
+ pm_runtime_get_sync(mmc_dev(mmc));
+
+ spin_lock_irqsave(&host->lock, flags);
+ host->ops->enable_sdio_irq(host, enable);
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ if (!enable) {
+ pm_runtime_mark_last_busy(mmc_dev(mmc));
+ pm_runtime_put_autosuspend(mmc_dev(mmc));
+ }
+ }
+}
+
+static void mmci_ack_sdio_irq(struct mmc_host *mmc)
+{
+ struct mmci_host *host = mmc_priv(mmc);
+ unsigned long flags;
+
+ if (!host->variant->use_sdio_irq)
+ return;
+
+ if (host->ops && host->ops->enable_sdio_irq) {
+ spin_lock_irqsave(&host->lock, flags);
+ host->ops->enable_sdio_irq(host, 1);
+ spin_unlock_irqrestore(&host->lock, flags);
+ }
+}
+
static struct mmc_host_ops mmci_ops = {
.request = mmci_request,
.pre_req = mmci_pre_request,
@@ -1891,6 +1942,8 @@ static struct mmc_host_ops mmci_ops = {
.get_ro = mmc_gpio_get_ro,
.get_cd = mmci_get_cd,
.start_signal_voltage_switch = mmci_sig_volt_switch,
+ .enable_sdio_irq = mmci_enable_sdio_irq,
+ .ack_sdio_irq = mmci_ack_sdio_irq,
};

static void mmci_probe_level_translator(struct mmc_host *mmc)
@@ -2158,6 +2211,14 @@ static int mmci_probe(struct amba_device *dev,
mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
}

+ if (variant->use_sdio_irq && host->mmc->caps & MMC_CAP_SDIO_IRQ) {
+ mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
+
+ if (variant->datactrl_mask_sdio)
+ mmci_write_datactrlreg(host,
+ host->variant->datactrl_mask_sdio);
+ }
+
/* Variants with mandatory busy timeout in HW needs R1B responses. */
if (variant->busy_timeout)
mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
@@ -2254,7 +2315,9 @@ static int mmci_probe(struct amba_device *dev,
pm_runtime_set_autosuspend_delay(&dev->dev, 50);
pm_runtime_use_autosuspend(&dev->dev);

- mmc_add_host(mmc);
+ ret = mmc_add_host(mmc);
+ if (ret)
+ goto clk_disable;

pm_runtime_put(&dev->dev);
return 0;
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index e1a9b96a3..a710cd686 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -316,6 +316,7 @@ struct mmci_host;
* @opendrain: bitmask identifying the OPENDRAIN bit inside MMCIPOWER register
* @dma_lli: true if variant has dma link list feature.
* @stm32_idmabsize_mask: stm32 sdmmc idma buffer size.
+ * @use_sdio_irq: allow SD I/O card to interrupt the host
*/
struct variant_data {
unsigned int clkreg;
@@ -360,6 +361,7 @@ struct variant_data {
u32 start_err;
u32 opendrain;
u8 dma_lli:1;
+ u8 use_sdio_irq:1;
u32 stm32_idmabsize_mask;
void (*init)(struct mmci_host *host);
};
@@ -383,6 +385,8 @@ struct mmci_host_ops {
bool (*busy_complete)(struct mmci_host *host, u32 status, u32 err_msk);
void (*pre_sig_volt_switch)(struct mmci_host *host);
int (*post_sig_volt_switch)(struct mmci_host *host, struct mmc_ios *ios);
+ void (*enable_sdio_irq)(struct mmci_host *host, int enable);
+ void (*sdio_irq)(struct mmci_host *host, u32 status);
};

struct mmci_host {
diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c
index 60bca78a7..400e84a93 100644
--- a/drivers/mmc/host/mmci_stm32_sdmmc.c
+++ b/drivers/mmc/host/mmci_stm32_sdmmc.c
@@ -559,6 +559,25 @@ static int sdmmc_post_sig_volt_switch(struct mmci_host *host,
return ret;
}

+static void sdmmc_enable_sdio_irq(struct mmci_host *host, int enable)
+{
+ void __iomem *base = host->base;
+ u32 mask = readl_relaxed(base + MMCIMASK0);
+
+ if (enable)
+ writel_relaxed(mask | MCI_ST_SDIOITMASK, base + MMCIMASK0);
+ else
+ writel_relaxed(mask & ~MCI_ST_SDIOITMASK, base + MMCIMASK0);
+}
+
+static void sdmmc_sdio_irq(struct mmci_host *host, u32 status)
+{
+ if (status & MCI_ST_SDIOIT) {
+ sdmmc_enable_sdio_irq(host, 0);
+ sdio_signal_irq(host->mmc);
+ }
+}
+
static struct mmci_host_ops sdmmc_variant_ops = {
.validate_data = sdmmc_idma_validate_data,
.prep_data = sdmmc_idma_prep_data,
@@ -572,6 +591,8 @@ static struct mmci_host_ops sdmmc_variant_ops = {
.busy_complete = sdmmc_busy_complete,
.pre_sig_volt_switch = sdmmc_pre_sig_volt_vswitch,
.post_sig_volt_switch = sdmmc_post_sig_volt_switch,
+ .enable_sdio_irq = sdmmc_enable_sdio_irq,
+ .sdio_irq = sdmmc_sdio_irq,
};

void sdmmc_variant_init(struct mmci_host *host)

Hello @Tomáš Juřena,
Indeed, the SDIO in-band interrupt feature has been added in our Kernel by development team. We thank you for your complete feedback and for sharing this with the community.

This feature is planned to be back-ported on the next OSTL-4.1, on kernel 5.15.

Kind regards,
Erwan.

In order to give better visibility on the answered topics, please click on 'Accept as Solution' on the reply which solved your issue or answered your question.