From eaebf4c3b103dfc3c00dd3b6d3966fb9e08777ce Mon Sep 17 00:00:00 2001 From: Shukun Tan Date: Mon, 20 Jan 2020 15:30:06 +0800 Subject: [PATCH 001/162] crypto: hisilicon - Unify hardware error init/uninit into QM The initialization and uninitialization of zip/hpre/sec/qm hardware error is processed in respective drivers, which could be unified into qm.c. We add struct hisi_qm_err_ini into struct hisi_qm, which involve all error handlers of device and assignment should be done in driver probe. Signed-off-by: Shukun Tan Signed-off-by: Zaibo Xu Reviewed-by: Zhou Wang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 33 +++++---- drivers/crypto/hisilicon/qm.c | 86 +++++++++++++++++------ drivers/crypto/hisilicon/qm.h | 20 +++++- drivers/crypto/hisilicon/sec2/sec_main.c | 51 ++++++-------- drivers/crypto/hisilicon/zip/zip_main.c | 58 +++++++-------- 5 files changed, 149 insertions(+), 99 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 401747de67a8..5bf274c0da85 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -349,18 +349,14 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm) hisi_qm_debug_regs_clear(qm); } -static void hpre_hw_error_disable(struct hpre *hpre) +static void hpre_hw_error_disable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hpre->qm; - /* disable hpre hw error interrupts */ writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK); } -static void hpre_hw_error_enable(struct hpre *hpre) +static void hpre_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hpre->qm; - /* enable hpre hw error interrupts */ writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK); writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB); @@ -713,12 +709,16 @@ static int hpre_qm_pre_init(struct hisi_qm *qm, struct pci_dev *pdev) return 0; } -static void hpre_hw_err_init(struct hpre *hpre) -{ - hisi_qm_hw_error_init(&hpre->qm, QM_BASE_CE, QM_BASE_NFE, - 0, QM_DB_RANDOM_INVALID); - hpre_hw_error_enable(hpre); -} +static const struct hisi_qm_err_ini hpre_err_ini = { + .hw_err_enable = hpre_hw_error_enable, + .hw_err_disable = hpre_hw_error_disable, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; static int hpre_pf_probe_init(struct hpre *hpre) { @@ -731,7 +731,8 @@ static int hpre_pf_probe_init(struct hpre *hpre) if (ret) return ret; - hpre_hw_err_init(hpre); + qm->err_ini = &hpre_err_ini; + hisi_qm_dev_err_init(qm); return 0; } @@ -790,8 +791,7 @@ err_with_qm_start: hisi_qm_stop(qm); err_with_err_init: - if (pdev->is_physfn) - hpre_hw_error_disable(hpre); + hisi_qm_dev_err_uninit(qm); err_with_qm_init: hisi_qm_uninit(qm); @@ -922,8 +922,7 @@ static void hpre_remove(struct pci_dev *pdev) hpre_debugfs_exit(hpre); hisi_qm_stop(qm); - if (qm->fun_type == QM_HW_PF) - hpre_hw_error_disable(hpre); + hisi_qm_dev_err_uninit(qm); hisi_qm_uninit(qm); } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index b57da5ef8b5b..e1d08e896505 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -277,6 +277,7 @@ struct hisi_qm_hw_ops { int (*debug_init)(struct hisi_qm *qm); void (*hw_error_init)(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, u32 msi); + void (*hw_error_uninit)(struct hisi_qm *qm); pci_ers_result_t (*hw_error_handle)(struct hisi_qm *qm); }; @@ -1011,6 +1012,11 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK); } +static void qm_hw_error_uninit_v2(struct hisi_qm *qm) +{ + writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK); +} + static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status) { const struct hisi_qm_hw_error *err = qm_hw_error; @@ -1082,6 +1088,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = { .qm_db = qm_db_v2, .get_irq_num = qm_get_irq_num_v2, .hw_error_init = qm_hw_error_init_v2, + .hw_error_uninit = qm_hw_error_uninit_v2, .hw_error_handle = qm_hw_error_handle_v2, }; @@ -1856,35 +1863,28 @@ void hisi_qm_debug_regs_clear(struct hisi_qm *qm) } EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); -/** - * hisi_qm_hw_error_init() - Configure qm hardware error report method. - * @qm: The qm which we want to configure. - * @ce: Bit mask of correctable error configure. - * @nfe: Bit mask of non-fatal error configure. - * @fe: Bit mask of fatal error configure. - * @msi: Bit mask of error reported by message signal interrupt. - * - * Hardware errors of qm can be reported either by RAS interrupts which will - * be handled by UEFI and then PCIe AER or by device MSI. User can configure - * each error to use either of above two methods. For RAS interrupts, we can - * configure an error as one of correctable error, non-fatal error or - * fatal error. - * - * Bits indicating errors can be configured to ce, nfe, fe and msi to enable - * related report methods. Error report will be masked if related error bit - * does not configure. - */ -void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, - u32 msi) +static void qm_hw_error_init(struct hisi_qm *qm) { + const struct hisi_qm_err_info *err_info = &qm->err_ini->err_info; + if (!qm->ops->hw_error_init) { dev_err(&qm->pdev->dev, "QM doesn't support hw error handling!\n"); return; } - qm->ops->hw_error_init(qm, ce, nfe, fe, msi); + qm->ops->hw_error_init(qm, err_info->ce, err_info->nfe, + err_info->fe, err_info->msi); +} + +static void qm_hw_error_uninit(struct hisi_qm *qm) +{ + if (!qm->ops->hw_error_uninit) { + dev_err(&qm->pdev->dev, "Unexpected QM hw error uninit!\n"); + return; + } + + qm->ops->hw_error_uninit(qm); } -EXPORT_SYMBOL_GPL(hisi_qm_hw_error_init); /** * hisi_qm_hw_error_handle() - Handle qm non-fatal hardware errors. @@ -1922,6 +1922,48 @@ enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(hisi_qm_get_hw_version); +/** + * hisi_qm_dev_err_init() - Initialize device error configuration. + * @qm: The qm for which we want to do error initialization. + * + * Initialize QM and device error related configuration. + */ +void hisi_qm_dev_err_init(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_VF) + return; + + qm_hw_error_init(qm); + + if (!qm->err_ini->hw_err_enable) { + dev_err(&qm->pdev->dev, "Device doesn't support hw error init!\n"); + return; + } + qm->err_ini->hw_err_enable(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_init); + +/** + * hisi_qm_dev_err_uninit() - Uninitialize device error configuration. + * @qm: The qm for which we want to do error uninitialization. + * + * Uninitialize QM and device error related configuration. + */ +void hisi_qm_dev_err_uninit(struct hisi_qm *qm) +{ + if (qm->fun_type == QM_HW_VF) + return; + + qm_hw_error_uninit(qm); + + if (!qm->err_ini->hw_err_disable) { + dev_err(&qm->pdev->dev, "Unexpected device hw error uninit!\n"); + return; + } + qm->err_ini->hw_err_disable(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Zhou Wang "); MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver"); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 078b8f1f1b77..325f6d5d7991 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -125,6 +125,21 @@ struct hisi_qm_status { unsigned long flags; }; +struct hisi_qm; + +struct hisi_qm_err_info { + u32 ce; + u32 nfe; + u32 fe; + u32 msi; +}; + +struct hisi_qm_err_ini { + void (*hw_err_enable)(struct hisi_qm *qm); + void (*hw_err_disable)(struct hisi_qm *qm); + struct hisi_qm_err_info err_info; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -148,6 +163,7 @@ struct hisi_qm { dma_addr_t aeqe_dma; struct hisi_qm_status status; + const struct hisi_qm_err_ini *err_ini; rwlock_t qps_lock; unsigned long *qp_bitmap; @@ -211,11 +227,11 @@ int hisi_qm_get_free_qp_num(struct hisi_qm *qm); int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number); int hisi_qm_debug_init(struct hisi_qm *qm); -void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe, - u32 msi); pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm); enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); +void hisi_qm_dev_err_init(struct hisi_qm *qm); +void hisi_qm_dev_err_uninit(struct hisi_qm *qm); struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 2bbaf1e2dae7..3cb5fd3f0e4f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -384,9 +384,8 @@ static void sec_debug_regs_clear(struct hisi_qm *qm) hisi_qm_debug_regs_clear(qm); } -static void sec_hw_error_enable(struct sec_dev *sec) +static void sec_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &sec->qm; u32 val; if (qm->ver == QM_HW_V1) { @@ -414,9 +413,8 @@ static void sec_hw_error_enable(struct sec_dev *sec) writel(val, qm->io_base + SEC_CONTROL_REG); } -static void sec_hw_error_disable(struct sec_dev *sec) +static void sec_hw_error_disable(struct hisi_qm *qm) { - struct hisi_qm *qm = &sec->qm; u32 val; val = readl(qm->io_base + SEC_CONTROL_REG); @@ -435,27 +433,6 @@ static void sec_hw_error_disable(struct sec_dev *sec) writel(val, qm->io_base + SEC_CONTROL_REG); } -static void sec_hw_error_init(struct sec_dev *sec) -{ - if (sec->qm.fun_type == QM_HW_VF) - return; - - hisi_qm_hw_error_init(&sec->qm, QM_BASE_CE, - QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT - | QM_ACC_WB_NOT_READY_TIMEOUT, 0, - QM_DB_RANDOM_INVALID); - sec_hw_error_enable(sec); -} - -static void sec_hw_error_uninit(struct sec_dev *sec) -{ - if (sec->qm.fun_type == QM_HW_VF) - return; - - sec_hw_error_disable(sec); - writel(GENMASK(12, 0), sec->qm.io_base + SEC_QM_ABNORMAL_INT_MASK); -} - static u32 sec_current_qm_read(struct sec_debug_file *file) { struct hisi_qm *qm = file->qm; @@ -695,6 +672,18 @@ static void sec_debugfs_exit(struct sec_dev *sec) debugfs_remove_recursive(sec->qm.debug.debug_root); } +static const struct hisi_qm_err_ini sec_err_ini = { + .hw_err_enable = sec_hw_error_enable, + .hw_err_disable = sec_hw_error_disable, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT | + QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; + static int sec_pf_probe_init(struct sec_dev *sec) { struct hisi_qm *qm = &sec->qm; @@ -713,11 +702,13 @@ static int sec_pf_probe_init(struct sec_dev *sec) return -EINVAL; } + qm->err_ini = &sec_err_ini; + ret = sec_set_user_domain_and_cache(sec); if (ret) return ret; - sec_hw_error_init(sec); + hisi_qm_dev_err_init(qm); sec_debug_regs_clear(qm); return 0; @@ -777,9 +768,9 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec) return 0; } -static void sec_probe_uninit(struct sec_dev *sec) +static void sec_probe_uninit(struct hisi_qm *qm) { - sec_hw_error_uninit(sec); + hisi_qm_dev_err_uninit(qm); } static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -836,7 +827,7 @@ err_remove_from_list: hisi_qm_stop(qm); err_probe_uninit: - sec_probe_uninit(sec); + sec_probe_uninit(qm); err_qm_uninit: sec_qm_uninit(qm); @@ -967,7 +958,7 @@ static void sec_remove(struct pci_dev *pdev) if (qm->fun_type == QM_HW_PF) sec_debug_regs_clear(qm); - sec_probe_uninit(sec); + sec_probe_uninit(qm); sec_qm_uninit(qm); } diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index e1bab1a91333..4f60b93c853a 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -60,13 +60,13 @@ #define HZIP_CORE_DEBUG_DECOMP_5 0x309000 #define HZIP_CORE_INT_SOURCE 0x3010A0 -#define HZIP_CORE_INT_MASK 0x3010A4 +#define HZIP_CORE_INT_MASK_REG 0x3010A4 #define HZIP_CORE_INT_STATUS 0x3010AC #define HZIP_CORE_INT_STATUS_M_ECC BIT(1) #define HZIP_CORE_SRAM_ECC_ERR_INFO 0x301148 #define SRAM_ECC_ERR_NUM_SHIFT 16 #define SRAM_ECC_ERR_ADDR_SHIFT 24 -#define HZIP_CORE_INT_DISABLE 0x000007FF +#define HZIP_CORE_INT_MASK_ALL GENMASK(10, 0) #define HZIP_COMP_CORE_NUM 2 #define HZIP_DECOMP_CORE_NUM 6 #define HZIP_CORE_NUM (HZIP_COMP_CORE_NUM + \ @@ -366,27 +366,26 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL); } -static void hisi_zip_hw_error_set_state(struct hisi_zip *hisi_zip, bool state) +static void hisi_zip_hw_error_enable(struct hisi_qm *qm) { - struct hisi_qm *qm = &hisi_zip->qm; - if (qm->ver == QM_HW_V1) { - writel(HZIP_CORE_INT_DISABLE, qm->io_base + HZIP_CORE_INT_MASK); + writel(HZIP_CORE_INT_MASK_ALL, + qm->io_base + HZIP_CORE_INT_MASK_REG); dev_info(&qm->pdev->dev, "Does not support hw error handle\n"); return; } - if (state) { - /* clear ZIP hw error source if having */ - writel(HZIP_CORE_INT_DISABLE, hisi_zip->qm.io_base + - HZIP_CORE_INT_SOURCE); - /* enable ZIP hw error interrupts */ - writel(0, hisi_zip->qm.io_base + HZIP_CORE_INT_MASK); - } else { - /* disable ZIP hw error interrupts */ - writel(HZIP_CORE_INT_DISABLE, - hisi_zip->qm.io_base + HZIP_CORE_INT_MASK); - } + /* clear ZIP hw error source if having */ + writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_SOURCE); + + /* enable ZIP hw error interrupts */ + writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); +} + +static void hisi_zip_hw_error_disable(struct hisi_qm *qm) +{ + /* disable ZIP hw error interrupts */ + writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG); } static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file) @@ -638,13 +637,16 @@ static void hisi_zip_debugfs_exit(struct hisi_zip *hisi_zip) hisi_zip_debug_regs_clear(hisi_zip); } -static void hisi_zip_hw_error_init(struct hisi_zip *hisi_zip) -{ - hisi_qm_hw_error_init(&hisi_zip->qm, QM_BASE_CE, - QM_BASE_NFE | QM_ACC_WB_NOT_READY_TIMEOUT, 0, - QM_DB_RANDOM_INVALID); - hisi_zip_hw_error_set_state(hisi_zip, true); -} +static const struct hisi_qm_err_ini hisi_zip_err_ini = { + .hw_err_enable = hisi_zip_hw_error_enable, + .hw_err_disable = hisi_zip_hw_error_disable, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, + } +}; static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) { @@ -671,8 +673,10 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) return -EINVAL; } + qm->err_ini = &hisi_zip_err_ini; + hisi_zip_set_user_domain_and_cache(hisi_zip); - hisi_zip_hw_error_init(hisi_zip); + hisi_qm_dev_err_init(qm); hisi_zip_debug_regs_clear(hisi_zip); return 0; @@ -887,9 +891,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_debugfs_exit(hisi_zip); hisi_qm_stop(qm); - if (qm->fun_type == QM_HW_PF) - hisi_zip_hw_error_set_state(hisi_zip, false); - + hisi_qm_dev_err_uninit(qm); hisi_qm_uninit(qm); hisi_zip_remove_from_list(hisi_zip); } From de3daf4b4ad58d3c4110f437f4b2f3fc631a2d3a Mon Sep 17 00:00:00 2001 From: Shukun Tan Date: Mon, 20 Jan 2020 15:30:07 +0800 Subject: [PATCH 002/162] crypto: hisilicon - Configure zip RAS error type Configure zip RAS error type in error handle initialization, Where ECC 1bit is configured as CE error, others are NFE. Signed-off-by: Shukun Tan Reviewed-by: Zhou Wang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/zip/zip_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 4f60b93c853a..ec2408e6a5cd 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -64,6 +64,10 @@ #define HZIP_CORE_INT_STATUS 0x3010AC #define HZIP_CORE_INT_STATUS_M_ECC BIT(1) #define HZIP_CORE_SRAM_ECC_ERR_INFO 0x301148 +#define HZIP_CORE_INT_RAS_CE_ENB 0x301160 +#define HZIP_CORE_INT_RAS_NFE_ENB 0x301164 +#define HZIP_CORE_INT_RAS_FE_ENB 0x301168 +#define HZIP_CORE_INT_RAS_NFE_ENABLE 0x7FE #define SRAM_ECC_ERR_NUM_SHIFT 16 #define SRAM_ECC_ERR_ADDR_SHIFT 24 #define HZIP_CORE_INT_MASK_ALL GENMASK(10, 0) @@ -378,6 +382,12 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm) /* clear ZIP hw error source if having */ writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_SOURCE); + /* configure error type */ + writel(0x1, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB); + writel(0x0, qm->io_base + HZIP_CORE_INT_RAS_FE_ENB); + writel(HZIP_CORE_INT_RAS_NFE_ENABLE, + qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB); + /* enable ZIP hw error interrupts */ writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); } From f826e6efb42b9eb0097d0969310a407aa039be08 Mon Sep 17 00:00:00 2001 From: Shukun Tan Date: Mon, 20 Jan 2020 15:30:08 +0800 Subject: [PATCH 003/162] crypto: hisilicon - Unify error detect process into qm In error detect process, a lot of duplicate code can put into qm. We add two callback(get_dev_hw_err_status and log_dev_hw_err) into struct hisi_qm_err_ini to handle device error detect, meanwhile the qm error detect not changed. Signed-off-by: Shukun Tan Signed-off-by: Zaibo Xu Reviewed-by: Zhou Wang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 93 +++++---------- drivers/crypto/hisilicon/qm.c | 71 ++++++++++-- drivers/crypto/hisilicon/qm.h | 5 +- drivers/crypto/hisilicon/sec2/sec_main.c | 127 +++++++-------------- drivers/crypto/hisilicon/zip/zip_main.c | 133 ++++++++-------------- 5 files changed, 185 insertions(+), 244 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 5bf274c0da85..0ba4a92d4618 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -709,14 +709,36 @@ static int hpre_qm_pre_init(struct hisi_qm *qm, struct pci_dev *pdev) return 0; } +static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts) +{ + const struct hpre_hw_error *err = hpre_hw_errors; + struct device *dev = &qm->pdev->dev; + + while (err->msg) { + if (err->int_msk & err_sts) + dev_warn(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + err++; + } + + writel(err_sts, qm->io_base + HPRE_HAC_SOURCE_INT); +} + +static u32 hpre_get_hw_err_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + HPRE_HAC_INT_STATUS); +} + static const struct hisi_qm_err_ini hpre_err_ini = { - .hw_err_enable = hpre_hw_error_enable, - .hw_err_disable = hpre_hw_error_disable, - .err_info = { - .ce = QM_BASE_CE, - .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT, - .fe = 0, - .msi = QM_DB_RANDOM_INVALID, + .hw_err_enable = hpre_hw_error_enable, + .hw_err_disable = hpre_hw_error_disable, + .get_dev_hw_err_status = hpre_get_hw_err_status, + .log_dev_hw_err = hpre_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, } }; @@ -926,64 +948,9 @@ static void hpre_remove(struct pci_dev *pdev) hisi_qm_uninit(qm); } -static void hpre_log_hw_error(struct hpre *hpre, u32 err_sts) -{ - const struct hpre_hw_error *err = hpre_hw_errors; - struct device *dev = &hpre->qm.pdev->dev; - - while (err->msg) { - if (err->int_msk & err_sts) - dev_warn(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); - err++; - } -} - -static pci_ers_result_t hpre_hw_error_handle(struct hpre *hpre) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(hpre->qm.io_base + HPRE_HAC_INT_STATUS); - if (err_sts) { - hpre_log_hw_error(hpre, err_sts); - - /* clear error interrupts */ - writel(err_sts, hpre->qm.io_base + HPRE_HAC_SOURCE_INT); - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hpre_process_hw_error(struct pci_dev *pdev) -{ - struct hpre *hpre = pci_get_drvdata(pdev); - pci_ers_result_t qm_ret, hpre_ret; - - /* log qm error */ - qm_ret = hisi_qm_hw_error_handle(&hpre->qm); - - /* log hpre error */ - hpre_ret = hpre_hw_error_handle(hpre); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - hpre_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hpre_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return hpre_process_hw_error(pdev); -} static const struct pci_error_handlers hpre_err_handler = { - .error_detected = hpre_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver hpre_pci_driver = { diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index e1d08e896505..2c0e22f3abe0 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1886,13 +1886,7 @@ static void qm_hw_error_uninit(struct hisi_qm *qm) qm->ops->hw_error_uninit(qm); } -/** - * hisi_qm_hw_error_handle() - Handle qm non-fatal hardware errors. - * @qm: The qm which has non-fatal hardware errors. - * - * Accelerators use this function to handle qm non-fatal hardware errors. - */ -pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm) +static pci_ers_result_t qm_hw_error_handle(struct hisi_qm *qm) { if (!qm->ops->hw_error_handle) { dev_err(&qm->pdev->dev, "QM doesn't support hw error report!\n"); @@ -1901,7 +1895,6 @@ pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm) return qm->ops->hw_error_handle(qm); } -EXPORT_SYMBOL_GPL(hisi_qm_hw_error_handle); /** * hisi_qm_get_hw_version() - Get hardware version of a qm. @@ -1964,6 +1957,68 @@ void hisi_qm_dev_err_uninit(struct hisi_qm *qm) } EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit); +static pci_ers_result_t qm_dev_err_handle(struct hisi_qm *qm) +{ + u32 err_sts; + + if (!qm->err_ini->get_dev_hw_err_status) { + dev_err(&qm->pdev->dev, "Device doesn't support get hw error status!\n"); + return PCI_ERS_RESULT_NONE; + } + + /* get device hardware error status */ + err_sts = qm->err_ini->get_dev_hw_err_status(qm); + if (err_sts) { + if (!qm->err_ini->log_dev_hw_err) { + dev_err(&qm->pdev->dev, "Device doesn't support log hw error!\n"); + return PCI_ERS_RESULT_NEED_RESET; + } + + qm->err_ini->log_dev_hw_err(qm, err_sts); + return PCI_ERS_RESULT_NEED_RESET; + } + + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t qm_process_dev_error(struct pci_dev *pdev) +{ + struct hisi_qm *qm = pci_get_drvdata(pdev); + pci_ers_result_t qm_ret, dev_ret; + + /* log qm error */ + qm_ret = qm_hw_error_handle(qm); + + /* log device error */ + dev_ret = qm_dev_err_handle(qm); + + return (qm_ret == PCI_ERS_RESULT_NEED_RESET || + dev_ret == PCI_ERS_RESULT_NEED_RESET) ? + PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; +} + +/** + * hisi_qm_dev_err_detected() - Get device and qm error status then log it. + * @pdev: The PCI device which need report error. + * @state: The connectivity between CPU and device. + * + * We register this function into PCIe AER handlers, It will report device or + * qm hardware error status when error occur. + */ +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + if (pdev->is_virtfn) + return PCI_ERS_RESULT_NONE; + + pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + return qm_process_dev_error(pdev); +} +EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Zhou Wang "); MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver"); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 325f6d5d7991..cae26eabd849 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -137,6 +137,8 @@ struct hisi_qm_err_info { struct hisi_qm_err_ini { void (*hw_err_enable)(struct hisi_qm *qm); void (*hw_err_disable)(struct hisi_qm *qm); + u32 (*get_dev_hw_err_status)(struct hisi_qm *qm); + void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); struct hisi_qm_err_info err_info; }; @@ -227,11 +229,12 @@ int hisi_qm_get_free_qp_num(struct hisi_qm *qm); int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number); int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number); int hisi_qm_debug_init(struct hisi_qm *qm); -pci_ers_result_t hisi_qm_hw_error_handle(struct hisi_qm *qm); enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); void hisi_qm_dev_err_init(struct hisi_qm *qm); void hisi_qm_dev_err_uninit(struct hisi_qm *qm); +pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, + pci_channel_state_t state); struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 3cb5fd3f0e4f..3767fdb64d98 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -672,15 +672,48 @@ static void sec_debugfs_exit(struct sec_dev *sec) debugfs_remove_recursive(sec->qm.debug.debug_root); } +static void sec_log_hw_error(struct hisi_qm *qm, u32 err_sts) +{ + const struct sec_hw_error *errs = sec_hw_errors; + struct device *dev = &qm->pdev->dev; + u32 err_val; + + while (errs->msg) { + if (errs->int_msk & err_sts) { + dev_err(dev, "%s [error status=0x%x] found\n", + errs->msg, errs->int_msk); + + if (SEC_CORE_INT_STATUS_M_ECC & errs->int_msk) { + err_val = readl(qm->io_base + + SEC_CORE_SRAM_ECC_ERR_INFO); + dev_err(dev, "multi ecc sram num=0x%x\n", + SEC_ECC_NUM(err_val)); + dev_err(dev, "multi ecc sram addr=0x%x\n", + SEC_ECC_ADDR(err_val)); + } + } + errs++; + } + + writel(err_sts, qm->io_base + SEC_CORE_INT_SOURCE); +} + +static u32 sec_get_hw_err_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + SEC_CORE_INT_STATUS); +} + static const struct hisi_qm_err_ini sec_err_ini = { - .hw_err_enable = sec_hw_error_enable, - .hw_err_disable = sec_hw_error_disable, - .err_info = { - .ce = QM_BASE_CE, - .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT | - QM_ACC_WB_NOT_READY_TIMEOUT, - .fe = 0, - .msi = QM_DB_RANDOM_INVALID, + .hw_err_enable = sec_hw_error_enable, + .hw_err_disable = sec_hw_error_disable, + .get_dev_hw_err_status = sec_get_hw_err_status, + .log_dev_hw_err = sec_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | QM_ACC_DO_TASK_TIMEOUT | + QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, } }; @@ -963,84 +996,8 @@ static void sec_remove(struct pci_dev *pdev) sec_qm_uninit(qm); } -static void sec_log_hw_error(struct sec_dev *sec, u32 err_sts) -{ - const struct sec_hw_error *errs = sec_hw_errors; - struct device *dev = &sec->qm.pdev->dev; - u32 err_val; - - while (errs->msg) { - if (errs->int_msk & err_sts) { - dev_err(dev, "%s [error status=0x%x] found\n", - errs->msg, errs->int_msk); - - if (SEC_CORE_INT_STATUS_M_ECC & err_sts) { - err_val = readl(sec->qm.io_base + - SEC_CORE_SRAM_ECC_ERR_INFO); - dev_err(dev, "multi ecc sram num=0x%x\n", - SEC_ECC_NUM(err_val)); - dev_err(dev, "multi ecc sram addr=0x%x\n", - SEC_ECC_ADDR(err_val)); - } - } - errs++; - } -} - -static pci_ers_result_t sec_hw_error_handle(struct sec_dev *sec) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(sec->qm.io_base + SEC_CORE_INT_STATUS); - if (err_sts) { - sec_log_hw_error(sec, err_sts); - - /* clear error interrupts */ - writel(err_sts, sec->qm.io_base + SEC_CORE_INT_SOURCE); - - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t sec_process_hw_error(struct pci_dev *pdev) -{ - struct sec_dev *sec = pci_get_drvdata(pdev); - pci_ers_result_t qm_ret, sec_ret; - - if (!sec) { - pci_err(pdev, "Can't recover error during device init\n"); - return PCI_ERS_RESULT_NONE; - } - - /* log qm error */ - qm_ret = hisi_qm_hw_error_handle(&sec->qm); - - /* log sec error */ - sec_ret = sec_hw_error_handle(sec); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - sec_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t sec_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - if (pdev->is_virtfn) - return PCI_ERS_RESULT_NONE; - - pci_info(pdev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return sec_process_hw_error(pdev); -} - static const struct pci_error_handlers sec_err_handler = { - .error_detected = sec_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver sec_pci_driver = { diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index ec2408e6a5cd..f4aec18a25b4 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -68,8 +68,8 @@ #define HZIP_CORE_INT_RAS_NFE_ENB 0x301164 #define HZIP_CORE_INT_RAS_FE_ENB 0x301168 #define HZIP_CORE_INT_RAS_NFE_ENABLE 0x7FE -#define SRAM_ECC_ERR_NUM_SHIFT 16 -#define SRAM_ECC_ERR_ADDR_SHIFT 24 +#define HZIP_SRAM_ECC_ERR_NUM_SHIFT 16 +#define HZIP_SRAM_ECC_ERR_ADDR_SHIFT 24 #define HZIP_CORE_INT_MASK_ALL GENMASK(10, 0) #define HZIP_COMP_CORE_NUM 2 #define HZIP_DECOMP_CORE_NUM 6 @@ -647,14 +647,50 @@ static void hisi_zip_debugfs_exit(struct hisi_zip *hisi_zip) hisi_zip_debug_regs_clear(hisi_zip); } +static void hisi_zip_log_hw_error(struct hisi_qm *qm, u32 err_sts) +{ + const struct hisi_zip_hw_error *err = zip_hw_error; + struct device *dev = &qm->pdev->dev; + u32 err_val; + + while (err->msg) { + if (err->int_msk & err_sts) { + dev_err(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + + if (err->int_msk & HZIP_CORE_INT_STATUS_M_ECC) { + err_val = readl(qm->io_base + + HZIP_CORE_SRAM_ECC_ERR_INFO); + dev_err(dev, "hisi-zip multi ecc sram num=0x%x\n", + ((err_val >> + HZIP_SRAM_ECC_ERR_NUM_SHIFT) & 0xFF)); + dev_err(dev, "hisi-zip multi ecc sram addr=0x%x\n", + (err_val >> + HZIP_SRAM_ECC_ERR_ADDR_SHIFT)); + } + } + err++; + } + + writel(err_sts, qm->io_base + HZIP_CORE_INT_SOURCE); +} + +static u32 hisi_zip_get_hw_err_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + HZIP_CORE_INT_STATUS); +} + static const struct hisi_qm_err_ini hisi_zip_err_ini = { - .hw_err_enable = hisi_zip_hw_error_enable, - .hw_err_disable = hisi_zip_hw_error_disable, - .err_info = { - .ce = QM_BASE_CE, - .nfe = QM_BASE_NFE | QM_ACC_WB_NOT_READY_TIMEOUT, - .fe = 0, - .msi = QM_DB_RANDOM_INVALID, + .hw_err_enable = hisi_zip_hw_error_enable, + .hw_err_disable = hisi_zip_hw_error_disable, + .get_dev_hw_err_status = hisi_zip_get_hw_err_status, + .log_dev_hw_err = hisi_zip_log_hw_error, + .err_info = { + .ce = QM_BASE_CE, + .nfe = QM_BASE_NFE | + QM_ACC_WB_NOT_READY_TIMEOUT, + .fe = 0, + .msi = QM_DB_RANDOM_INVALID, } }; @@ -906,85 +942,8 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_zip_remove_from_list(hisi_zip); } -static void hisi_zip_log_hw_error(struct hisi_zip *hisi_zip, u32 err_sts) -{ - const struct hisi_zip_hw_error *err = zip_hw_error; - struct device *dev = &hisi_zip->qm.pdev->dev; - u32 err_val; - - while (err->msg) { - if (err->int_msk & err_sts) { - dev_warn(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); - - if (HZIP_CORE_INT_STATUS_M_ECC & err->int_msk) { - err_val = readl(hisi_zip->qm.io_base + - HZIP_CORE_SRAM_ECC_ERR_INFO); - dev_warn(dev, "hisi-zip multi ecc sram num=0x%x\n", - ((err_val >> SRAM_ECC_ERR_NUM_SHIFT) & - 0xFF)); - dev_warn(dev, "hisi-zip multi ecc sram addr=0x%x\n", - (err_val >> SRAM_ECC_ERR_ADDR_SHIFT)); - } - } - err++; - } -} - -static pci_ers_result_t hisi_zip_hw_error_handle(struct hisi_zip *hisi_zip) -{ - u32 err_sts; - - /* read err sts */ - err_sts = readl(hisi_zip->qm.io_base + HZIP_CORE_INT_STATUS); - - if (err_sts) { - hisi_zip_log_hw_error(hisi_zip, err_sts); - /* clear error interrupts */ - writel(err_sts, hisi_zip->qm.io_base + HZIP_CORE_INT_SOURCE); - - return PCI_ERS_RESULT_NEED_RESET; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hisi_zip_process_hw_error(struct pci_dev *pdev) -{ - struct hisi_zip *hisi_zip = pci_get_drvdata(pdev); - struct device *dev = &pdev->dev; - pci_ers_result_t qm_ret, zip_ret; - - if (!hisi_zip) { - dev_err(dev, - "Can't recover ZIP-error occurred during device init\n"); - return PCI_ERS_RESULT_NONE; - } - - qm_ret = hisi_qm_hw_error_handle(&hisi_zip->qm); - - zip_ret = hisi_zip_hw_error_handle(hisi_zip); - - return (qm_ret == PCI_ERS_RESULT_NEED_RESET || - zip_ret == PCI_ERS_RESULT_NEED_RESET) ? - PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t hisi_zip_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - if (pdev->is_virtfn) - return PCI_ERS_RESULT_NONE; - - dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state); - if (state == pci_channel_io_perm_failure) - return PCI_ERS_RESULT_DISCONNECT; - - return hisi_zip_process_hw_error(pdev); -} - static const struct pci_error_handlers hisi_zip_err_handler = { - .error_detected = hisi_zip_error_detected, + .error_detected = hisi_qm_dev_err_detected, }; static struct pci_driver hisi_zip_pci_driver = { From 00e62e868ccd230e92c6f08ef2877ede10292882 Mon Sep 17 00:00:00 2001 From: Shukun Tan Date: Mon, 20 Jan 2020 15:30:09 +0800 Subject: [PATCH 004/162] crypto: hisilicon - Fix duplicate print when qm occur multiple errors If all possible errors occurs at the same time, the error_status will be all 1s. The doorbell timeout error and FIFO overflow error will be print in each cycle, which should be print just once. Signed-off-by: Shukun Tan Reviewed-by: Zhou Wang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 53 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 2c0e22f3abe0..79f84dc63a43 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -1019,41 +1019,38 @@ static void qm_hw_error_uninit_v2(struct hisi_qm *qm) static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status) { - const struct hisi_qm_hw_error *err = qm_hw_error; + const struct hisi_qm_hw_error *err; struct device *dev = &qm->pdev->dev; u32 reg_val, type, vf_num; + int i; - while (err->msg) { - if (err->int_msk & error_status) { - dev_err(dev, "%s [error status=0x%x] found\n", - err->msg, err->int_msk); + for (i = 0; i < ARRAY_SIZE(qm_hw_error); i++) { + err = &qm_hw_error[i]; + if (!(err->int_msk & error_status)) + continue; - if (error_status & QM_DB_TIMEOUT) { - reg_val = readl(qm->io_base + - QM_ABNORMAL_INF01); - type = (reg_val & QM_DB_TIMEOUT_TYPE) >> - QM_DB_TIMEOUT_TYPE_SHIFT; - vf_num = reg_val & QM_DB_TIMEOUT_VF; - dev_err(dev, "qm %s doorbell timeout in function %u\n", - qm_db_timeout[type], vf_num); - } + dev_err(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); - if (error_status & QM_OF_FIFO_OF) { - reg_val = readl(qm->io_base + - QM_ABNORMAL_INF00); - type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >> - QM_FIFO_OVERFLOW_TYPE_SHIFT; - vf_num = reg_val & QM_FIFO_OVERFLOW_VF; + if (err->int_msk & QM_DB_TIMEOUT) { + reg_val = readl(qm->io_base + QM_ABNORMAL_INF01); + type = (reg_val & QM_DB_TIMEOUT_TYPE) >> + QM_DB_TIMEOUT_TYPE_SHIFT; + vf_num = reg_val & QM_DB_TIMEOUT_VF; + dev_err(dev, "qm %s doorbell timeout in function %u\n", + qm_db_timeout[type], vf_num); + } else if (err->int_msk & QM_OF_FIFO_OF) { + reg_val = readl(qm->io_base + QM_ABNORMAL_INF00); + type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >> + QM_FIFO_OVERFLOW_TYPE_SHIFT; + vf_num = reg_val & QM_FIFO_OVERFLOW_VF; - if (type < ARRAY_SIZE(qm_fifo_overflow)) - dev_err(dev, "qm %s fifo overflow in function %u\n", - qm_fifo_overflow[type], - vf_num); - else - dev_err(dev, "unknown error type\n"); - } + if (type < ARRAY_SIZE(qm_fifo_overflow)) + dev_err(dev, "qm %s fifo overflow in function %u\n", + qm_fifo_overflow[type], vf_num); + else + dev_err(dev, "unknown error type\n"); } - err++; } } From eed74b3eba9eda36d155c11a12b2b4b50c67c1d8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 20 Jan 2020 17:38:04 +0300 Subject: [PATCH 005/162] crypto: rng - Fix a refcounting bug in crypto_rng_reset() We need to decrement this refcounter on these error paths. Fixes: f7d76e05d058 ("crypto: user - fix use_after_free of struct xxx_request") Cc: Signed-off-by: Dan Carpenter Acked-by: Neil Horman Signed-off-by: Herbert Xu --- crypto/rng.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crypto/rng.c b/crypto/rng.c index 1e21231f71c9..1490d210f1a1 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -37,12 +37,16 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) crypto_stats_get(alg); if (!seed && slen) { buf = kmalloc(slen, GFP_KERNEL); - if (!buf) + if (!buf) { + crypto_alg_put(alg); return -ENOMEM; + } err = get_random_bytes_wait(buf, slen); - if (err) + if (err) { + crypto_alg_put(alg); goto out; + } seed = buf; } From 07b586fe06625b0b610dc3d3a969c51913d143d4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 20 Jan 2020 18:18:15 +0100 Subject: [PATCH 006/162] crypto: x86/curve25519 - replace with formally verified implementation This comes from INRIA's HACL*/Vale. It implements the same algorithm and implementation strategy as the code it replaces, only this code has been formally verified, sans the base point multiplication, which uses code similar to prior, only it uses the formally verified field arithmetic alongside reproducable ladder generation steps. This doesn't have a pure-bmi2 version, which means haswell no longer benefits, but the increased (doubled) code complexity is not worth it for a single generation of chips that's already old. Performance-wise, this is around 1% slower on older microarchitectures, and slightly faster on newer microarchitectures, mainly 10nm ones or backports of 10nm to 14nm. This implementation is "everest" below: Xeon E5-2680 v4 (Broadwell) armfazh: 133340 cycles per call everest: 133436 cycles per call Xeon Gold 5120 (Sky Lake Server) armfazh: 112636 cycles per call everest: 113906 cycles per call Core i5-6300U (Sky Lake Client) armfazh: 116810 cycles per call everest: 117916 cycles per call Core i7-7600U (Kaby Lake) armfazh: 119523 cycles per call everest: 119040 cycles per call Core i7-8750H (Coffee Lake) armfazh: 113914 cycles per call everest: 113650 cycles per call Core i9-9880H (Coffee Lake Refresh) armfazh: 112616 cycles per call everest: 114082 cycles per call Core i3-8121U (Cannon Lake) armfazh: 113202 cycles per call everest: 111382 cycles per call Core i7-8265U (Whiskey Lake) armfazh: 127307 cycles per call everest: 127697 cycles per call Core i7-8550U (Kaby Lake Refresh) armfazh: 127522 cycles per call everest: 127083 cycles per call Xeon Platinum 8275CL (Cascade Lake) armfazh: 114380 cycles per call everest: 114656 cycles per call Achieving these kind of results with formally verified code is quite remarkable, especialy considering that performance is favorable for newer chips. Signed-off-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- arch/x86/crypto/curve25519-x86_64.c | 3652 ++++++++++----------------- 1 file changed, 1345 insertions(+), 2307 deletions(-) diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c index eec7d2d24239..e4e58b8e9afe 100644 --- a/arch/x86/crypto/curve25519-x86_64.c +++ b/arch/x86/crypto/curve25519-x86_64.c @@ -1,8 +1,7 @@ -// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR MIT /* - * Copyright (c) 2017 Armando Faz . All Rights Reserved. - * Copyright (C) 2018-2019 Jason A. Donenfeld . All Rights Reserved. - * Copyright (C) 2018 Samuel Neves . All Rights Reserved. + * Copyright (C) 2020 Jason A. Donenfeld . All Rights Reserved. + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation */ #include @@ -16,2337 +15,1378 @@ #include #include -static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx); +static __always_inline u64 eq_mask(u64 a, u64 b) +{ + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + return xnx - (u64)1U; +} -enum { NUM_WORDS_ELTFP25519 = 4 }; -typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519]; -typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519]; +static __always_inline u64 gte_mask(u64 a, u64 b) +{ + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + return x_xor_q_ - (u64)1U; +} -#define mul_eltfp25519_1w_adx(c, a, b) do { \ - mul_256x256_integer_adx(m.buffer, a, b); \ - red_eltfp25519_1w_adx(c, m.buffer); \ -} while (0) +/* Computes the addition of four-element f1 with value in f2 + * and returns the carry (if any) */ +static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) +{ + u64 carry_r; -#define mul_eltfp25519_1w_bmi2(c, a, b) do { \ - mul_256x256_integer_bmi2(m.buffer, a, b); \ - red_eltfp25519_1w_bmi2(c, m.buffer); \ -} while (0) + asm volatile( + /* Clear registers to propagate the carry bit */ + " xor %%r8, %%r8;" + " xor %%r9, %%r9;" + " xor %%r10, %%r10;" + " xor %%r11, %%r11;" + " xor %1, %1;" -#define sqr_eltfp25519_1w_adx(a) do { \ - sqr_256x256_integer_adx(m.buffer, a); \ - red_eltfp25519_1w_adx(a, m.buffer); \ -} while (0) + /* Begin addition chain */ + " addq 0(%3), %0;" + " movq %0, 0(%2);" + " adcxq 8(%3), %%r8;" + " movq %%r8, 8(%2);" + " adcxq 16(%3), %%r9;" + " movq %%r9, 16(%2);" + " adcxq 24(%3), %%r10;" + " movq %%r10, 24(%2);" -#define sqr_eltfp25519_1w_bmi2(a) do { \ - sqr_256x256_integer_bmi2(m.buffer, a); \ - red_eltfp25519_1w_bmi2(a, m.buffer); \ -} while (0) + /* Return the carry bit in a register */ + " adcx %%r11, %1;" + : "+&r" (f2), "=&r" (carry_r) + : "r" (out), "r" (f1) + : "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); -#define mul_eltfp25519_2w_adx(c, a, b) do { \ - mul2_256x256_integer_adx(m.buffer, a, b); \ - red_eltfp25519_2w_adx(c, m.buffer); \ -} while (0) + return carry_r; +} -#define mul_eltfp25519_2w_bmi2(c, a, b) do { \ - mul2_256x256_integer_bmi2(m.buffer, a, b); \ - red_eltfp25519_2w_bmi2(c, m.buffer); \ -} while (0) +/* Computes the field addition of two field elements */ +static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw addition of f1 + f2 */ + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" -#define sqr_eltfp25519_2w_adx(a) do { \ - sqr2_256x256_integer_adx(m.buffer, a); \ - red_eltfp25519_2w_adx(a, m.buffer); \ -} while (0) + /* Wrap the result back into the field */ -#define sqr_eltfp25519_2w_bmi2(a) do { \ - sqr2_256x256_integer_bmi2(m.buffer, a); \ - red_eltfp25519_2w_bmi2(a, m.buffer); \ -} while (0) + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" -#define sqrn_eltfp25519_1w_adx(a, times) do { \ - int ____counter = (times); \ - while (____counter-- > 0) \ - sqr_eltfp25519_1w_adx(a); \ -} while (0) + /* Step 2: Add carry*38 to the original sum */ + " xor %%rcx, %%rcx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" -#define sqrn_eltfp25519_1w_bmi2(a, times) do { \ - int ____counter = (times); \ - while (____counter-- > 0) \ - sqr_eltfp25519_1w_bmi2(a); \ -} while (0) + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r" (f2) + : "r" (out), "r" (f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); +} -#define copy_eltfp25519_1w(C, A) do { \ - (C)[0] = (A)[0]; \ - (C)[1] = (A)[1]; \ - (C)[2] = (A)[2]; \ - (C)[3] = (A)[3]; \ -} while (0) +/* Computes the field substraction of two field elements */ +static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw substraction of f1-f2 */ + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" -#define setzero_eltfp25519_1w(C) do { \ - (C)[0] = 0; \ - (C)[1] = 0; \ - (C)[2] = 0; \ - (C)[3] = 0; \ -} while (0) + /* Wrap the result back into the field */ -__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = { - /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL, - 0xffffffffffffffffUL, 0x5fffffffffffffffUL, - /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL, - 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL, - /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL, - 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL, - /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL, - 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL, - /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL, - 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL, - /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL, - 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL, - /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL, - 0xc1c20d06231f7614UL, 0x2938218da274f972UL, - /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL, - 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL, - /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL, - 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL, - /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL, - 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL, - /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL, - 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL, - /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL, - 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL, - /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL, - 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL, - /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL, - 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL, - /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL, - 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL, - /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL, - 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL, - /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL, - 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL, - /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL, - 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL, - /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL, - 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL, - /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL, - 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL, - /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL, - 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL, - /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL, - 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL, - /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL, - 0x23758739f630a257UL, 0x295a407a01a78580UL, - /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL, - 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL, - /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL, - 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL, - /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL, - 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL, - /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL, - 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL, - /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL, - 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL, - /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL, - 0x74b4c4ceab102f64UL, 0x183abadd10139845UL, - /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL, - 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL, - /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL, - 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL, - /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL, - 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL, - /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL, - 0xd88768e4904032d8UL, 0x131384427b3aaeecUL, - /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL, - 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL, - /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL, - 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL, - /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL, - 0xa401760b882c797aUL, 0x1fc223e28dc88730UL, - /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL, - 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL, - /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL, - 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL, - /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL, - 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL, - /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL, - 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL, - /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL, - 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL, - /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL, - 0x5c217736fa279374UL, 0x7dde05734afeb1faUL, - /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL, - 0xe6053bf89595bf7aUL, 0x394faf38da245530UL, - /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL, - 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL, - /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL, - 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL, - /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL, - 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL, - /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL, - 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL, - /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL, - 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL, - /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL, - 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL, - /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL, - 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL, - /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL, - 0xc189218075e91436UL, 0x6d9284169b3b8484UL, - /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL, - 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL, - /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL, - 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL, - /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL, - 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL, - /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL, - 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL, - /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL, - 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL, - /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL, - 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL, - /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL, - 0xf826842130f5ad28UL, 0x3ea988f75301a441UL, - /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL, - 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL, - /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL, - 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL, - /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL, - 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL, - /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL, - 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL, - /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL, - 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL, - /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL, - 0x25232973322dbef4UL, 0x445dc4758c17f770UL, - /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL, - 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL, - /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL, - 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL, - /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL, - 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL, - /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL, - 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL, - /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL, - 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL, - /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL, - 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL, - /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL, - 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL, - /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL, - 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL, - /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL, - 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL, - /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL, - 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL, - /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL, - 0x674f1288f8e11217UL, 0x5682250f329f93d0UL, - /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL, - 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL, - /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL, - 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL, - /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL, - 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL, - /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL, - 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL, - /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL, - 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL, - /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL, - 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL, - /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL, - 0x894d1d855ae52359UL, 0x68e122157b743d69UL, - /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL, - 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL, - /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL, - 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL, - /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL, - 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL, - /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL, - 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL, - /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL, - 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL, - /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL, - 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL, - /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL, - 0x45adb16e76cefcf2UL, 0x01f768aead232999UL, - /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL, - 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL, - /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL, - 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL, - /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL, - 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL, - /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL, - 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL, - /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL, - 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL, - /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL, - 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL, - /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL, - 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL, - /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL, - 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL, - /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL, - 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL, - /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL, - 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL, - /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL, - 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL, - /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL, - 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL, - /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL, - 0x4a497962066e6043UL, 0x705b3aab41355b44UL, - /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL, - 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL, - /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL, - 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL, - /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL, - 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL, - /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL, - 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL, - /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL, - 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL, - /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL, - 0x2088ce1570033c68UL, 0x7fba1f495c837987UL, - /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL, - 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL, - /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL, - 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL, - /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL, - 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL, - /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL, - 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL, - /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL, - 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL, - /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL, - 0x00f52e3f67280294UL, 0x566d4fc14730c509UL, - /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL, - 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL, - /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL, - 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL, - /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL, - 0x508e862f121692fcUL, 0x3a81907fa093c291UL, - /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL, - 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL, - /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL, - 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL, - /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL, - 0xe488de11d761e352UL, 0x0e878a01a085545cUL, - /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL, - 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL, - /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL, - 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL, - /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL, - 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL, - /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL, - 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL, - /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL, - 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL, - /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL, - 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL, - /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL, - 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL, - /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL, - 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL, - /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL, - 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL, - /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL, - 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL, - /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL, - 0x904659bb686e3772UL, 0x7215c371746ba8c8UL, - /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL, - 0x266fd5809208f294UL, 0x5c847085619a26b9UL, - /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL, - 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL, - /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL, - 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL, - /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL, - 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL, - /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL, - 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL, - /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL, - 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL, - /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL, - 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL, - /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL, - 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL, - /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL, - 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL, - /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL, - 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL, - /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL, - 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL, - /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL, - 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL, - /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL, - 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL, - /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL, - 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL, - /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL, - 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL, - /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL, - 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL, - /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL, - 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL, - /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL, - 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL, - /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL, - 0x52d17436309d4253UL, 0x356f97e13efae576UL, - /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL, - 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL, - /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL, - 0x66124c6f97bda770UL, 0x0f81a0290654124aUL, - /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL, - 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL, - /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL, - 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL, - /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL, - 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL, - /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL, - 0x5da643cb4bf30035UL, 0x77db28d63940f721UL, - /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL, - 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL, - /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL, - 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL, - /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL, - 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL, - /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL, - 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL, - /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL, - 0x497d723f802e88e1UL, 0x30684dea602f408dUL, - /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL, - 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL, - /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL, - 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL, - /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL, - 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL, - /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL, - 0x026df551dbb85c20UL, 0x74fcd91047e21901UL, - /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL, - 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL, - /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL, - 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL, - /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL, - 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL, - /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL, - 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL, - /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL, - 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL, - /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL, - 0x13033ac001f66697UL, 0x273b24fe3b367d75UL, - /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL, - 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL, - /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL, - 0xacc63ca34b8ec145UL, 0x74621888fee66574UL, - /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL, - 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL, - /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL, - 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL, - /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL, - 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL, - /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL, - 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL, - /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL, - 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL, - /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL, - 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL, - /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL, - 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL, - /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL, - 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL, - /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL, - 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL, - /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL, - 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL, - /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL, - 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL, - /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL, - 0x81004b71e33cc191UL, 0x44e6be345122803cUL, - /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL, - 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL, - /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL, - 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL, - /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL, - 0x12bc8d6915783712UL, 0x498194c0fc620abbUL, - /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL, - 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL, - /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL, - 0x1e60c24598c71fffUL, 0x59f2f014979983efUL, - /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL, - 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL, - /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL, - 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL, - /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL, - 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL, - /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL, - 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL, - /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL, - 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL, - /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL, - 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL, - /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL, - 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL, - /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL, - 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL, - /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL, - 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL, - /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL, - 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL, - /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL, - 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL, - /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL, - 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL, - /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL, - 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL, - /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL, - 0x33979624f0e917beUL, 0x2c018dc527356b30UL, - /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL, - 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL, - /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL, - 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL, - /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL, - 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL, - /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL, - 0x345ead5e972d091eUL, 0x18c8df11a83103baUL, - /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL, - 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL, - /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL, - 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL, - /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL, - 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL, - /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL, - 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL, - /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL, - 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL, - /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL, - 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL, - /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL, - 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL, - /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL, - 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL, - /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL, - 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL, - /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL, - 0x1df4c0af01314a60UL, 0x09a62dab89289527UL, - /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL, - 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL, - /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL, - 0x49b96853d7a7084aUL, 0x4980a319601420a8UL, - /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL, - 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL, - /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL, - 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL, - /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL, - 0xddeb34a061615d99UL, 0x5129cecceb64b773UL, - /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL, - 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL, - /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL, - 0x680bd77c73edad2eUL, 0x487c02354edd9041UL, - /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL, - 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL, - /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL, - 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL, - /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL, - 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL, - /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL, - 0xe9834262d13921edUL, 0x27fedafaa54bb592UL, - /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL, - 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL, - /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL, - 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL, - /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL, - 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL, - /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL, - 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL, - /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL, - 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL, - /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL, - 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL, - /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL, - 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL, - /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL, - 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL, - /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL, - 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL, - /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL, - 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL, - /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL, - 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL, - /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL, - 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL, - /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL, - 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL, - /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL, - 0x2c43ecea0107c1ddUL, 0x526028809372de35UL, - /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL, - 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL, - /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL, - 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL, - /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL, - 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL, - /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL, - 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL, - /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL, - 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL, - /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL, - 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL, - /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL, - 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL, - /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL, - 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL, - /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL, - 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + /* Step 2: Substract carry*38 from the original difference */ + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + /* Store the result */ + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r" (out), "r" (f1), "r" (f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" + ); +} + +/* Computes a field multiplication: out <- f1 * f2 + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication: tmp <- src1 * src2 */ + + /* Compute src1[0] * src2 */ + " movq 0(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 8(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 16(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 24(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 8(%0);" + " adcx %3, %%r10;" + " movq %%r10, 16(%0);" + " adcx %3, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) + : + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" + ); +} + +/* Computes two field multiplications: + * out[0] <- f1[0] * f2[0] + * out[1] <- f1[1] * f2[1] + * Uses the 16-element buffer tmp for intermediate results. */ +static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ + + /* Compute src1[0] * src2 */ + " movq 0(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 8(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 16(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 24(%1), %%rdx;" + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" + " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" + + /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ + + /* Compute src1[0] * src2 */ + " movq 32(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ + " movq 40(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);" + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ + " movq 48(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);" + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ + " movq 56(%1), %%rdx;" + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);" + " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;" + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the results back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 8(%0);" + " adcx %3, %%r10;" + " movq %%r10, 16(%0);" + " adcx %3, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%1), %%r8, %%r13;" + " xor %3, %3;" + " adoxq 64(%1), %%r8;" + " mulxq 104(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 72(%1), %%r9;" + " mulxq 112(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 80(%1), %%r10;" + " mulxq 120(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%1), %%r11;" + " adcx %3, %%rax;" + " adox %3, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %3, %%r9;" + " movq %%r9, 40(%0);" + " adcx %3, %%r10;" + " movq %%r10, 48(%0);" + " adcx %3, %%r11;" + " movq %%r11, 56(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%0);" + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) + : + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" + ); +} + +/* Computes the field multiplication of four-element f1 with value in f2 */ +static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) +{ + register u64 f2_r asm("rdx") = f2; + + asm volatile( + /* Compute the raw multiplication of f1*f2 */ + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ + " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */ + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ + " adcx %%r12, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r" (f2_r) + : "r" (out), "r" (f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc" + ); +} + +/* Computes p1 <- bit ? p2 : p1 in constant time */ +static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) +{ + asm volatile( + /* Invert the polarity of bit to match cmov expectations */ + " add $18446744073709551615, %0;" + + /* cswap p1[0], p2[0] */ + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + /* cswap p1[1], p2[1] */ + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + /* cswap p1[2], p2[2] */ + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + /* cswap p1[3], p2[3] */ + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + /* cswap p1[4], p2[4] */ + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + /* cswap p1[5], p2[5] */ + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + /* cswap p1[6], p2[6] */ + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + /* cswap p1[7], p2[7] */ + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r" (bit) + : "r" (p1), "r" (p2) + : "%r8", "%r9", "%r10", "memory", "cc" + ); +} + +/* Computes the square of a field element: out <- f * f + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication: tmp <- f * f */ + + /* Step 1: Compute all partial products */ + " movq 0(%1), %%rdx;" /* f[0] */ + " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%1), %%rdx;" /* f[3] */ + " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%r12;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%r12, %%r12;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%0);" + " add %%rcx, %%r8;" " movq %%r8, 8(%0);" + " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" + " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" + + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + : "+&r" (tmp), "+&r" (f), "+&r" (out) + : + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" + ); +} + +/* Computes two field squarings: + * out[0] <- f[0] * f[0] + * out[1] <- f[1] * f[1] + * Uses the 16-element buffer tmp for intermediate results */ +static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Step 1: Compute all partial products */ + " movq 0(%1), %%rdx;" /* f[0] */ + " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%1), %%rdx;" /* f[3] */ + " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%r12;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%r12, %%r12;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%0);" + " add %%rcx, %%r8;" " movq %%r8, 8(%0);" + " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" + " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" + + /* Step 1: Compute all partial products */ + " movq 32(%1), %%rdx;" /* f[0] */ + " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ + " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 56(%1), %%rdx;" /* f[3] */ + " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ + " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15, %%r15;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%r12;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%r12, %%r12;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 64(%0);" + " add %%rcx, %%r8;" " movq %%r8, 72(%0);" + " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" " movq %%r9, 80(%0);" + " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" + " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" + " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);" + " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" + " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" + + /* Line up pointers */ + " mov %0, %1;" + " mov %2, %0;" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 0(%1), %%r8;" + " mulxq 40(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 8(%1), %%r9;" + " mulxq 48(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 16(%1), %%r10;" + " mulxq 56(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%0);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%1), %%r8, %%r13;" + " xor %%rcx, %%rcx;" + " adoxq 64(%1), %%r8;" + " mulxq 104(%1), %%r9, %%r12;" + " adcx %%r13, %%r9;" + " adoxq 72(%1), %%r9;" + " mulxq 112(%1), %%r10, %%r13;" + " adcx %%r12, %%r10;" + " adoxq 80(%1), %%r10;" + " mulxq 120(%1), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%1), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%0);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%0);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%0);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%0);" + : "+&r" (tmp), "+&r" (f), "+&r" (out) + : + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" + ); +} + +static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) +{ + u64 *nq = p01_tmp1; + u64 *nq_p1 = p01_tmp1 + (u32)8U; + u64 *tmp1 = p01_tmp1 + (u32)16U; + u64 *x1 = q; + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *z3 = nq_p1 + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + u64 *x3; + u64 *z31; + u64 *d0; + u64 *c0; + u64 *a1; + u64 *b1; + u64 *d; + u64 *c; + u64 *ab1; + u64 *dc1; + fadd(a, x2, z2); + fsub(b, x2, z2); + x3 = nq_p1; + z31 = nq_p1 + (u32)4U; + d0 = dc; + c0 = dc + (u32)4U; + fadd(c0, x3, z31); + fsub(d0, x3, z31); + fmul2(dc, dc, ab, tmp2); + fadd(x3, d0, c0); + fsub(z31, d0, c0); + a1 = tmp1; + b1 = tmp1 + (u32)4U; + d = tmp1 + (u32)8U; + c = tmp1 + (u32)12U; + ab1 = tmp1; + dc1 = tmp1 + (u32)8U; + fsqr2(dc1, ab1, tmp2); + fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b1, c, (u64)121665U); + fadd(b1, b1, d); + fmul2(nq, dc1, ab1, tmp2); + fmul(z3, z3, x1, tmp2); +} + +static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) +{ + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *d = tmp1 + (u32)8U; + u64 *c = tmp1 + (u32)12U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + fadd(a, x2, z2); + fsub(b, x2, z2); + fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b, c, (u64)121665U); + fadd(b, b, d); + fmul2(nq, dc, ab, tmp2); +} + +static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) +{ + u64 tmp2[16U] = { 0U }; + u64 p01_tmp1_swap[33U] = { 0U }; + u64 *p0 = p01_tmp1_swap; + u64 *p01 = p01_tmp1_swap; + u64 *p03 = p01; + u64 *p11 = p01 + (u32)8U; + u64 *x0; + u64 *z0; + u64 *p01_tmp1; + u64 *p01_tmp11; + u64 *nq10; + u64 *nq_p11; + u64 *swap1; + u64 sw0; + u64 *nq1; + u64 *tmp1; + memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); + x0 = p03; + z0 = p03 + (u32)4U; + x0[0U] = (u64)1U; + x0[1U] = (u64)0U; + x0[2U] = (u64)0U; + x0[3U] = (u64)0U; + z0[0U] = (u64)0U; + z0[1U] = (u64)0U; + z0[2U] = (u64)0U; + z0[3U] = (u64)0U; + p01_tmp1 = p01_tmp1_swap; + p01_tmp11 = p01_tmp1_swap; + nq10 = p01_tmp1_swap; + nq_p11 = p01_tmp1_swap + (u32)8U; + swap1 = p01_tmp1_swap + (u32)32U; + cswap2((u64)1U, nq10, nq_p11); + point_add_and_double(init1, p01_tmp11, tmp2); + swap1[0U] = (u64)1U; + { + u32 i; + for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { + u64 *p01_tmp12 = p01_tmp1_swap; + u64 *swap2 = p01_tmp1_swap + (u32)32U; + u64 *nq2 = p01_tmp12; + u64 *nq_p12 = p01_tmp12 + (u32)8U; + u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); + u64 sw = swap2[0U] ^ bit; + cswap2(sw, nq2, nq_p12); + point_add_and_double(init1, p01_tmp12, tmp2); + swap2[0U] = bit; + } + } + sw0 = swap1[0U]; + cswap2(sw0, nq10, nq_p11); + nq1 = p01_tmp1; + tmp1 = p01_tmp1 + (u32)16U; + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + memcpy(out, p0, (u32)8U * sizeof(p0[0U])); + + memzero_explicit(tmp2, sizeof(tmp2)); + memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); +} + +static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) +{ + u32 i; + fsqr(o, inp, tmp); + for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) + fsqr(o, o, tmp); +} + +static void finv(u64 *o, const u64 *i, u64 *tmp) +{ + u64 t1[16U] = { 0U }; + u64 *a0 = t1; + u64 *b = t1 + (u32)4U; + u64 *c = t1 + (u32)8U; + u64 *t00 = t1 + (u32)12U; + u64 *tmp1 = tmp; + u64 *a; + u64 *t0; + fsquare_times(a0, i, tmp1, (u32)1U); + fsquare_times(t00, a0, tmp1, (u32)2U); + fmul(b, t00, i, tmp); + fmul(a0, b, a0, tmp); + fsquare_times(t00, a0, tmp1, (u32)1U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)5U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)10U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)20U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)10U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)50U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)100U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)50U); + fmul(t00, t00, b, tmp); + fsquare_times(t00, t00, tmp1, (u32)5U); + a = t1; + t0 = t1 + (u32)12U; + fmul(o, t0, a, tmp); +} + +static void store_felem(u64 *b, u64 *f) +{ + u64 f30 = f[3U]; + u64 top_bit0 = f30 >> (u32)63U; + u64 carry0; + u64 f31; + u64 top_bit; + u64 carry; + u64 f0; + u64 f1; + u64 f2; + u64 f3; + u64 m0; + u64 m1; + u64 m2; + u64 m3; + u64 mask; + u64 f0_; + u64 f1_; + u64 f2_; + u64 f3_; + u64 o0; + u64 o1; + u64 o2; + u64 o3; + f[3U] = f30 & (u64)0x7fffffffffffffffU; + carry0 = add_scalar(f, f, (u64)19U * top_bit0); + f31 = f[3U]; + top_bit = f31 >> (u32)63U; + f[3U] = f31 & (u64)0x7fffffffffffffffU; + carry = add_scalar(f, f, (u64)19U * top_bit); + f0 = f[0U]; + f1 = f[1U]; + f2 = f[2U]; + f3 = f[3U]; + m0 = gte_mask(f0, (u64)0xffffffffffffffedU); + m1 = eq_mask(f1, (u64)0xffffffffffffffffU); + m2 = eq_mask(f2, (u64)0xffffffffffffffffU); + m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); + mask = ((m0 & m1) & m2) & m3; + f0_ = f0 - (mask & (u64)0xffffffffffffffedU); + f1_ = f1 - (mask & (u64)0xffffffffffffffffU); + f2_ = f2 - (mask & (u64)0xffffffffffffffffU); + f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); + o0 = f0_; + o1 = f1_; + o2 = f2_; + o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void encode_point(u8 *o, const u64 *i) +{ + const u64 *x = i; + const u64 *z = i + (u32)4U; + u64 tmp[4U] = { 0U }; + u64 tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul(tmp, tmp, x, tmp_w); + store_felem((u64 *)o, tmp); +} + +static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) +{ + u64 init1[8U] = { 0U }; + u64 tmp[4U] = { 0U }; + u64 tmp3; + u64 *x; + u64 *z; + { + u32 i; + for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { + u64 *os = tmp; + const u8 *bj = pub + i * (u32)8U; + u64 u = *(u64 *)bj; + u64 r = u; + u64 x0 = r; + os[i] = x0; + } + } + tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; + x = init1; + z = init1 + (u32)4U; + z[0U] = (u64)1U; + z[1U] = (u64)0U; + z[2U] = (u64)0U; + z[3U] = (u64)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init1, priv, init1); + encode_point(out, init1); +} + +/* The below constants were generated using this sage script: + * + * #!/usr/bin/env sage + * import sys + * from sage.all import * + * def limbs(n): + * n = int(n) + * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) + * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l + * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) + * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] + * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) + * print("static const u64 table_ladder[] = {") + * p = ec.lift_x(9) + * for i in range(252): + * l = (p[0] + p[2]) / (p[0] - p[2]) + * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) + * p = p * 2 + * print("};") + * + */ + +static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; + +static const u64 table_ladder[] = { + 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, + 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, + 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, + 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, + 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, + 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, + 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, + 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, + 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, + 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, + 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, + 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, + 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, + 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, + 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, + 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, + 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, + 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, + 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, + 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, + 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, + 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, + 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, + 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, + 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, + 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, + 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, + 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, + 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, + 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, + 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, + 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, + 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, + 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, + 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, + 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, + 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, + 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, + 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, + 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, + 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, + 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, + 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, + 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, + 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, + 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, + 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, + 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, + 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, + 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, + 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, + 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, + 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, + 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, + 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, + 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, + 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, + 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, + 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, + 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, + 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, + 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, + 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, + 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, + 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, + 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, + 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, + 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, + 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, + 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, + 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, + 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, + 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, + 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, + 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, + 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, + 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, + 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, + 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, + 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, + 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, + 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, + 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, + 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, + 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, + 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, + 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, + 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, + 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, + 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, + 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, + 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, + 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, + 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, + 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, + 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, + 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, + 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, + 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, + 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, + 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, + 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, + 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, + 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, + 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, + 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, + 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, + 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, + 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, + 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, + 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, + 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, + 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, + 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, + 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, + 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, + 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, + 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, + 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, + 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, + 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, + 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, + 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, + 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, + 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, + 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, + 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, + 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, + 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, + 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, + 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, + 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, + 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, + 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, + 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, + 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, + 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, + 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, + 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, + 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, + 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, + 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, + 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, + 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, + 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, + 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, + 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, + 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, + 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, + 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, + 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, + 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, + 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, + 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, + 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, + 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, + 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, + 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, + 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, + 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, + 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, + 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, + 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, + 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, + 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, + 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, + 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, + 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, + 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, + 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, + 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, + 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, + 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, + 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, + 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, + 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, + 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, + 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, + 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, + 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, + 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, + 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, + 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, + 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, + 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, + 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, + 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, + 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, + 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, + 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, + 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, + 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, + 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, + 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, + 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, + 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, + 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, + 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, + 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, + 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, + 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, + 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, + 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, + 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, + 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, + 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, + 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, + 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, + 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, + 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, + 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, + 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, + 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, + 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, + 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, + 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, + 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, + 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, + 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, + 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, + 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, + 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, + 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, + 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, + 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, + 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, + 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, + 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, + 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, + 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, + 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, + 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, + 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, + 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, + 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, + 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, + 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, + 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, + 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, + 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, + 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, + 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, + 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, + 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, + 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, + 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, + 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, + 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, + 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, + 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, + 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, + 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL }; -/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7] - * a is two 256-bit integers: a0[0:3] and a1[4:7] - * b is two 256-bit integers: b0[0:3] and b1[4:7] - */ -static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a, - const u64 *const b) +static void curve25519_ever64_base(u8 *out, const u8 *priv) { - asm volatile( - "xorl %%r14d, %%r14d ;" - "movq (%1), %%rdx; " /* A[0] */ - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ - "xorl %%r10d, %%r10d ;" - "movq %%r8, (%0) ;" - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ - "adox %%r10, %%r15 ;" - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ - "adox %%r8, %%rax ;" - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ - "adox %%r10, %%rbx ;" - /******************************************/ - "adox %%r14, %%rcx ;" - - "movq 8(%1), %%rdx; " /* A[1] */ - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ - "adox %%r15, %%r8 ;" - "movq %%r8, 8(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rax ;" - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%rbx ;" - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%rcx ;" - /******************************************/ - "adox %%r14, %%r15 ;" - "adcx %%r14, %%r15 ;" - - "movq 16(%1), %%rdx; " /* A[2] */ - "xorl %%r10d, %%r10d ;" - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ - "adox %%rax, %%r8 ;" - "movq %%r8, 16(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rbx ;" - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%rcx ;" - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%r15 ;" - /******************************************/ - "adox %%r14, %%rax ;" - "adcx %%r14, %%rax ;" - - "movq 24(%1), %%rdx; " /* A[3] */ - "xorl %%r10d, %%r10d ;" - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ - "adox %%rbx, %%r8 ;" - "movq %%r8, 24(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rcx ;" - "movq %%rcx, 32(%0) ;" - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%r15 ;" - "movq %%r15, 40(%0) ;" - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%rax ;" - "movq %%rax, 48(%0) ;" - /******************************************/ - "adox %%r14, %%rbx ;" - "adcx %%r14, %%rbx ;" - "movq %%rbx, 56(%0) ;" - - "movq 32(%1), %%rdx; " /* C[0] */ - "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ - "xorl %%r10d, %%r10d ;" - "movq %%r8, 64(%0);" - "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ - "adox %%r10, %%r15 ;" - "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ - "adox %%r8, %%rax ;" - "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ - "adox %%r10, %%rbx ;" - /******************************************/ - "adox %%r14, %%rcx ;" - - "movq 40(%1), %%rdx; " /* C[1] */ - "xorl %%r10d, %%r10d ;" - "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ - "adox %%r15, %%r8 ;" - "movq %%r8, 72(%0);" - "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rax ;" - "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%rbx ;" - "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%rcx ;" - /******************************************/ - "adox %%r14, %%r15 ;" - "adcx %%r14, %%r15 ;" - - "movq 48(%1), %%rdx; " /* C[2] */ - "xorl %%r10d, %%r10d ;" - "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ - "adox %%rax, %%r8 ;" - "movq %%r8, 80(%0);" - "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rbx ;" - "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%rcx ;" - "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%r15 ;" - /******************************************/ - "adox %%r14, %%rax ;" - "adcx %%r14, %%rax ;" - - "movq 56(%1), %%rdx; " /* C[3] */ - "xorl %%r10d, %%r10d ;" - "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ - "adox %%rbx, %%r8 ;" - "movq %%r8, 88(%0);" - "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ - "adox %%r10, %%r9 ;" - "adcx %%r9, %%rcx ;" - "movq %%rcx, 96(%0) ;" - "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ - "adox %%r8, %%r11 ;" - "adcx %%r11, %%r15 ;" - "movq %%r15, 104(%0) ;" - "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ - "adox %%r10, %%r13 ;" - "adcx %%r13, %%rax ;" - "movq %%rax, 112(%0) ;" - /******************************************/ - "adox %%r14, %%rbx ;" - "adcx %%r14, %%rbx ;" - "movq %%rbx, 120(%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r14", "%r15"); -} - -static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a, - const u64 *const b) -{ - asm volatile( - "movq (%1), %%rdx; " /* A[0] */ - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ - "movq %%r8, (%0) ;" - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ - "addq %%r10, %%r15 ;" - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ - "adcq %%r8, %%rax ;" - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ - "adcq %%r10, %%rbx ;" - /******************************************/ - "adcq $0, %%rcx ;" - - "movq 8(%1), %%rdx; " /* A[1] */ - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ - "addq %%r15, %%r8 ;" - "movq %%r8, 8(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%r15 ;" - - "addq %%r9, %%rax ;" - "adcq %%r11, %%rbx ;" - "adcq %%r13, %%rcx ;" - "adcq $0, %%r15 ;" - - "movq 16(%1), %%rdx; " /* A[2] */ - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ - "addq %%rax, %%r8 ;" - "movq %%r8, 16(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rax ;" - - "addq %%r9, %%rbx ;" - "adcq %%r11, %%rcx ;" - "adcq %%r13, %%r15 ;" - "adcq $0, %%rax ;" - - "movq 24(%1), %%rdx; " /* A[3] */ - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ - "addq %%rbx, %%r8 ;" - "movq %%r8, 24(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rbx ;" - - "addq %%r9, %%rcx ;" - "movq %%rcx, 32(%0) ;" - "adcq %%r11, %%r15 ;" - "movq %%r15, 40(%0) ;" - "adcq %%r13, %%rax ;" - "movq %%rax, 48(%0) ;" - "adcq $0, %%rbx ;" - "movq %%rbx, 56(%0) ;" - - "movq 32(%1), %%rdx; " /* C[0] */ - "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */ - "movq %%r8, 64(%0) ;" - "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */ - "addq %%r10, %%r15 ;" - "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */ - "adcq %%r8, %%rax ;" - "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */ - "adcq %%r10, %%rbx ;" - /******************************************/ - "adcq $0, %%rcx ;" - - "movq 40(%1), %%rdx; " /* C[1] */ - "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */ - "addq %%r15, %%r8 ;" - "movq %%r8, 72(%0) ;" - "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */ - "adcq %%r10, %%r9 ;" - "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */ - "adcq %%r8, %%r11 ;" - "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%r15 ;" - - "addq %%r9, %%rax ;" - "adcq %%r11, %%rbx ;" - "adcq %%r13, %%rcx ;" - "adcq $0, %%r15 ;" - - "movq 48(%1), %%rdx; " /* C[2] */ - "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */ - "addq %%rax, %%r8 ;" - "movq %%r8, 80(%0) ;" - "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */ - "adcq %%r10, %%r9 ;" - "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */ - "adcq %%r8, %%r11 ;" - "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rax ;" - - "addq %%r9, %%rbx ;" - "adcq %%r11, %%rcx ;" - "adcq %%r13, %%r15 ;" - "adcq $0, %%rax ;" - - "movq 56(%1), %%rdx; " /* C[3] */ - "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */ - "addq %%rbx, %%r8 ;" - "movq %%r8, 88(%0) ;" - "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */ - "adcq %%r10, %%r9 ;" - "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */ - "adcq %%r8, %%r11 ;" - "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rbx ;" - - "addq %%r9, %%rcx ;" - "movq %%rcx, 96(%0) ;" - "adcq %%r11, %%r15 ;" - "movq %%r15, 104(%0) ;" - "adcq %%r13, %%rax ;" - "movq %%rax, 112(%0) ;" - "adcq $0, %%rbx ;" - "movq %%rbx, 120(%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r15"); -} - -static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a) -{ - asm volatile( - "movq (%1), %%rdx ;" /* A[0] */ - "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ - "xorl %%r15d, %%r15d;" - "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ - "adcx %%r14, %%r9 ;" - "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ - "adcx %%rax, %%r10 ;" - "movq 24(%1), %%rdx ;" /* A[3] */ - "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ - "adcx %%rcx, %%r11 ;" - "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ - "adcx %%rax, %%rbx ;" - "movq 8(%1), %%rdx ;" /* A[1] */ - "adcx %%r15, %%r13 ;" - "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ - "movq $0, %%r14 ;" - /******************************************/ - "adcx %%r15, %%r14 ;" - - "xorl %%r15d, %%r15d;" - "adox %%rax, %%r10 ;" - "adcx %%r8, %%r8 ;" - "adox %%rcx, %%r11 ;" - "adcx %%r9, %%r9 ;" - "adox %%r15, %%rbx ;" - "adcx %%r10, %%r10 ;" - "adox %%r15, %%r13 ;" - "adcx %%r11, %%r11 ;" - "adox %%r15, %%r14 ;" - "adcx %%rbx, %%rbx ;" - "adcx %%r13, %%r13 ;" - "adcx %%r14, %%r14 ;" - - "movq (%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ - /*******************/ - "movq %%rax, 0(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 8(%0) ;" - "movq 8(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 16(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 24(%0) ;" - "movq 16(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 32(%0) ;" - "adcq %%rcx, %%rbx ;" - "movq %%rbx, 40(%0) ;" - "movq 24(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 48(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 56(%0) ;" - - - "movq 32(%1), %%rdx ;" /* B[0] */ - "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */ - "xorl %%r15d, %%r15d;" - "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */ - "adcx %%r14, %%r9 ;" - "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */ - "adcx %%rax, %%r10 ;" - "movq 56(%1), %%rdx ;" /* B[3] */ - "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */ - "adcx %%rcx, %%r11 ;" - "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */ - "adcx %%rax, %%rbx ;" - "movq 40(%1), %%rdx ;" /* B[1] */ - "adcx %%r15, %%r13 ;" - "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */ - "movq $0, %%r14 ;" - /******************************************/ - "adcx %%r15, %%r14 ;" - - "xorl %%r15d, %%r15d;" - "adox %%rax, %%r10 ;" - "adcx %%r8, %%r8 ;" - "adox %%rcx, %%r11 ;" - "adcx %%r9, %%r9 ;" - "adox %%r15, %%rbx ;" - "adcx %%r10, %%r10 ;" - "adox %%r15, %%r13 ;" - "adcx %%r11, %%r11 ;" - "adox %%r15, %%r14 ;" - "adcx %%rbx, %%rbx ;" - "adcx %%r13, %%r13 ;" - "adcx %%r14, %%r14 ;" - - "movq 32(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */ - /*******************/ - "movq %%rax, 64(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 72(%0) ;" - "movq 40(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 80(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 88(%0) ;" - "movq 48(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 96(%0) ;" - "adcq %%rcx, %%rbx ;" - "movq %%rbx, 104(%0) ;" - "movq 56(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 112(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 120(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r14", "%r15"); -} - -static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a) -{ - asm volatile( - "movq 8(%1), %%rdx ;" /* A[1] */ - "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ - "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ - "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ - - "movq 16(%1), %%rdx ;" /* A[2] */ - "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ - "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ - - "addq %%rax, %%r9 ;" - "adcq %%rdx, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq %%r14, %%r15 ;" - "adcq $0, %%r13 ;" - "movq $0, %%r14 ;" - "adcq $0, %%r14 ;" - - "movq (%1), %%rdx ;" /* A[0] */ - "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ - - "addq %%rax, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq $0, %%r15 ;" - "adcq $0, %%r13 ;" - "adcq $0, %%r14 ;" - - "shldq $1, %%r13, %%r14 ;" - "shldq $1, %%r15, %%r13 ;" - "shldq $1, %%r11, %%r15 ;" - "shldq $1, %%r10, %%r11 ;" - "shldq $1, %%r9, %%r10 ;" - "shldq $1, %%r8, %%r9 ;" - "shlq $1, %%r8 ;" - - /*******************/ - "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */ - /*******************/ - "movq %%rax, 0(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 8(%0) ;" - "movq 8(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 16(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 24(%0) ;" - "movq 16(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 32(%0) ;" - "adcq %%rcx, %%r15 ;" - "movq %%r15, 40(%0) ;" - "movq 24(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 48(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 56(%0) ;" - - "movq 40(%1), %%rdx ;" /* B[1] */ - "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */ - "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */ - "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */ - - "movq 48(%1), %%rdx ;" /* B[2] */ - "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */ - "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */ - - "addq %%rax, %%r9 ;" - "adcq %%rdx, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq %%r14, %%r15 ;" - "adcq $0, %%r13 ;" - "movq $0, %%r14 ;" - "adcq $0, %%r14 ;" - - "movq 32(%1), %%rdx ;" /* B[0] */ - "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */ - - "addq %%rax, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq $0, %%r15 ;" - "adcq $0, %%r13 ;" - "adcq $0, %%r14 ;" - - "shldq $1, %%r13, %%r14 ;" - "shldq $1, %%r15, %%r13 ;" - "shldq $1, %%r11, %%r15 ;" - "shldq $1, %%r10, %%r11 ;" - "shldq $1, %%r9, %%r10 ;" - "shldq $1, %%r8, %%r9 ;" - "shlq $1, %%r8 ;" - - /*******************/ - "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */ - /*******************/ - "movq %%rax, 64(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 72(%0) ;" - "movq 40(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 80(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 88(%0) ;" - "movq 48(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 96(%0) ;" - "adcq %%rcx, %%r15 ;" - "movq %%r15, 104(%0) ;" - "movq 56(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 112(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 120(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11", "%r13", "%r14", "%r15"); -} - -static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a) -{ - asm volatile( - "movl $38, %%edx; " /* 2*c = 38 = 2^256 */ - "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */ - "xorl %%ebx, %%ebx ;" - "adox (%1), %%r8 ;" - "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */ - "adcx %%r10, %%r9 ;" - "adox 8(%1), %%r9 ;" - "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */ - "adcx %%r11, %%r10 ;" - "adox 16(%1), %%r10 ;" - "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */ - "adcx %%rax, %%r11 ;" - "adox 24(%1), %%r11 ;" - /***************************************/ - "adcx %%rbx, %%rcx ;" - "adox %%rbx, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ - "adcx %%rcx, %%r8 ;" - "adcx %%rbx, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcx %%rbx, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcx %%rbx, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - - "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */ - "xorl %%ebx, %%ebx ;" - "adox 64(%1), %%r8 ;" - "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */ - "adcx %%r10, %%r9 ;" - "adox 72(%1), %%r9 ;" - "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */ - "adcx %%r11, %%r10 ;" - "adox 80(%1), %%r10 ;" - "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */ - "adcx %%rax, %%r11 ;" - "adox 88(%1), %%r11 ;" - /****************************************/ - "adcx %%rbx, %%rcx ;" - "adox %%rbx, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ - "adcx %%rcx, %%r8 ;" - "adcx %%rbx, %%r9 ;" - "movq %%r9, 40(%0) ;" - "adcx %%rbx, %%r10 ;" - "movq %%r10, 48(%0) ;" - "adcx %%rbx, %%r11 ;" - "movq %%r11, 56(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 32(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11"); -} - -static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a) -{ - asm volatile( - "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */ - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ - "addq %%r10, %%r9 ;" - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ - "adcq %%r11, %%r10 ;" - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ - "adcq %%rax, %%r11 ;" - /***************************************/ - "adcq $0, %%rcx ;" - "addq (%1), %%r8 ;" - "adcq 8(%1), %%r9 ;" - "adcq 16(%1), %%r10 ;" - "adcq 24(%1), %%r11 ;" - "adcq $0, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - - "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */ - "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */ - "addq %%r10, %%r9 ;" - "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */ - "adcq %%r11, %%r10 ;" - "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */ - "adcq %%rax, %%r11 ;" - /****************************************/ - "adcq $0, %%rcx ;" - "addq 64(%1), %%r8 ;" - "adcq 72(%1), %%r9 ;" - "adcq 80(%1), %%r10 ;" - "adcq 88(%1), %%r11 ;" - "adcq $0, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 40(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 48(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 56(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 32(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11"); -} - -static void mul_256x256_integer_adx(u64 *const c, const u64 *const a, - const u64 *const b) -{ - asm volatile( - "movq (%1), %%rdx; " /* A[0] */ - "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */ - "xorl %%r10d, %%r10d ;" - "movq %%r8, (%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */ - "adox %%r9, %%r10 ;" - "movq %%r10, 8(%0) ;" - "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */ - "adox %%r11, %%r15 ;" - "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */ - "adox %%r13, %%r14 ;" - "movq $0, %%rax ;" - /******************************************/ - "adox %%rdx, %%rax ;" - - "movq 8(%1), %%rdx; " /* A[1] */ - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ - "xorl %%r10d, %%r10d ;" - "adcx 8(%0), %%r8 ;" - "movq %%r8, 8(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ - "adox %%r9, %%r10 ;" - "adcx %%r15, %%r10 ;" - "movq %%r10, 16(%0) ;" - "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */ - "adox %%r11, %%r15 ;" - "adcx %%r14, %%r15 ;" - "movq $0, %%r8 ;" - "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */ - "adox %%r13, %%r14 ;" - "adcx %%rax, %%r14 ;" - "movq $0, %%rax ;" - /******************************************/ - "adox %%rdx, %%rax ;" - "adcx %%r8, %%rax ;" - - "movq 16(%1), %%rdx; " /* A[2] */ - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ - "xorl %%r10d, %%r10d ;" - "adcx 16(%0), %%r8 ;" - "movq %%r8, 16(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ - "adox %%r9, %%r10 ;" - "adcx %%r15, %%r10 ;" - "movq %%r10, 24(%0) ;" - "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */ - "adox %%r11, %%r15 ;" - "adcx %%r14, %%r15 ;" - "movq $0, %%r8 ;" - "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */ - "adox %%r13, %%r14 ;" - "adcx %%rax, %%r14 ;" - "movq $0, %%rax ;" - /******************************************/ - "adox %%rdx, %%rax ;" - "adcx %%r8, %%rax ;" - - "movq 24(%1), %%rdx; " /* A[3] */ - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ - "xorl %%r10d, %%r10d ;" - "adcx 24(%0), %%r8 ;" - "movq %%r8, 24(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ - "adox %%r9, %%r10 ;" - "adcx %%r15, %%r10 ;" - "movq %%r10, 32(%0) ;" - "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */ - "adox %%r11, %%r15 ;" - "adcx %%r14, %%r15 ;" - "movq %%r15, 40(%0) ;" - "movq $0, %%r8 ;" - "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */ - "adox %%r13, %%r14 ;" - "adcx %%rax, %%r14 ;" - "movq %%r14, 48(%0) ;" - "movq $0, %%rax ;" - /******************************************/ - "adox %%rdx, %%rax ;" - "adcx %%r8, %%rax ;" - "movq %%rax, 56(%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", - "%r13", "%r14", "%r15"); -} - -static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a, - const u64 *const b) -{ - asm volatile( - "movq (%1), %%rdx; " /* A[0] */ - "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */ - "movq %%r8, (%0) ;" - "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */ - "addq %%r10, %%r15 ;" - "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */ - "adcq %%r8, %%rax ;" - "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */ - "adcq %%r10, %%rbx ;" - /******************************************/ - "adcq $0, %%rcx ;" - - "movq 8(%1), %%rdx; " /* A[1] */ - "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */ - "addq %%r15, %%r8 ;" - "movq %%r8, 8(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%r15 ;" - - "addq %%r9, %%rax ;" - "adcq %%r11, %%rbx ;" - "adcq %%r13, %%rcx ;" - "adcq $0, %%r15 ;" - - "movq 16(%1), %%rdx; " /* A[2] */ - "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */ - "addq %%rax, %%r8 ;" - "movq %%r8, 16(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rax ;" - - "addq %%r9, %%rbx ;" - "adcq %%r11, %%rcx ;" - "adcq %%r13, %%r15 ;" - "adcq $0, %%rax ;" - - "movq 24(%1), %%rdx; " /* A[3] */ - "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */ - "addq %%rbx, %%r8 ;" - "movq %%r8, 24(%0) ;" - "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */ - "adcq %%r10, %%r9 ;" - "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */ - "adcq %%r8, %%r11 ;" - "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */ - "adcq %%r10, %%r13 ;" - /******************************************/ - "adcq $0, %%rbx ;" - - "addq %%r9, %%rcx ;" - "movq %%rcx, 32(%0) ;" - "adcq %%r11, %%r15 ;" - "movq %%r15, 40(%0) ;" - "adcq %%r13, %%rax ;" - "movq %%rax, 48(%0) ;" - "adcq $0, %%rbx ;" - "movq %%rbx, 56(%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r15"); -} - -static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a) -{ - asm volatile( - "movq (%1), %%rdx ;" /* A[0] */ - "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */ - "xorl %%r15d, %%r15d;" - "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */ - "adcx %%r14, %%r9 ;" - "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */ - "adcx %%rax, %%r10 ;" - "movq 24(%1), %%rdx ;" /* A[3] */ - "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */ - "adcx %%rcx, %%r11 ;" - "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */ - "adcx %%rax, %%rbx ;" - "movq 8(%1), %%rdx ;" /* A[1] */ - "adcx %%r15, %%r13 ;" - "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */ - "movq $0, %%r14 ;" - /******************************************/ - "adcx %%r15, %%r14 ;" - - "xorl %%r15d, %%r15d;" - "adox %%rax, %%r10 ;" - "adcx %%r8, %%r8 ;" - "adox %%rcx, %%r11 ;" - "adcx %%r9, %%r9 ;" - "adox %%r15, %%rbx ;" - "adcx %%r10, %%r10 ;" - "adox %%r15, %%r13 ;" - "adcx %%r11, %%r11 ;" - "adox %%r15, %%r14 ;" - "adcx %%rbx, %%rbx ;" - "adcx %%r13, %%r13 ;" - "adcx %%r14, %%r14 ;" - - "movq (%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ - /*******************/ - "movq %%rax, 0(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 8(%0) ;" - "movq 8(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 16(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 24(%0) ;" - "movq 16(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 32(%0) ;" - "adcq %%rcx, %%rbx ;" - "movq %%rbx, 40(%0) ;" - "movq 24(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 48(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 56(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11", "%r13", "%r14", "%r15"); -} - -static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a) -{ - asm volatile( - "movq 8(%1), %%rdx ;" /* A[1] */ - "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */ - "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */ - "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */ - - "movq 16(%1), %%rdx ;" /* A[2] */ - "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */ - "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */ - - "addq %%rax, %%r9 ;" - "adcq %%rdx, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq %%r14, %%r15 ;" - "adcq $0, %%r13 ;" - "movq $0, %%r14 ;" - "adcq $0, %%r14 ;" - - "movq (%1), %%rdx ;" /* A[0] */ - "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */ - - "addq %%rax, %%r10 ;" - "adcq %%rcx, %%r11 ;" - "adcq $0, %%r15 ;" - "adcq $0, %%r13 ;" - "adcq $0, %%r14 ;" - - "shldq $1, %%r13, %%r14 ;" - "shldq $1, %%r15, %%r13 ;" - "shldq $1, %%r11, %%r15 ;" - "shldq $1, %%r10, %%r11 ;" - "shldq $1, %%r9, %%r10 ;" - "shldq $1, %%r8, %%r9 ;" - "shlq $1, %%r8 ;" - - /*******************/ - "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */ - /*******************/ - "movq %%rax, 0(%0) ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, 8(%0) ;" - "movq 8(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */ - "adcq %%rax, %%r9 ;" - "movq %%r9, 16(%0) ;" - "adcq %%rcx, %%r10 ;" - "movq %%r10, 24(%0) ;" - "movq 16(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */ - "adcq %%rax, %%r11 ;" - "movq %%r11, 32(%0) ;" - "adcq %%rcx, %%r15 ;" - "movq %%r15, 40(%0) ;" - "movq 24(%1), %%rdx ;" - "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */ - "adcq %%rax, %%r13 ;" - "movq %%r13, 48(%0) ;" - "adcq %%rcx, %%r14 ;" - "movq %%r14, 56(%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11", "%r13", "%r14", "%r15"); -} - -static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a) -{ - asm volatile( - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ - "xorl %%ebx, %%ebx ;" - "adox (%1), %%r8 ;" - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ - "adcx %%r10, %%r9 ;" - "adox 8(%1), %%r9 ;" - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ - "adcx %%r11, %%r10 ;" - "adox 16(%1), %%r10 ;" - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ - "adcx %%rax, %%r11 ;" - "adox 24(%1), %%r11 ;" - /***************************************/ - "adcx %%rbx, %%rcx ;" - "adox %%rbx, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */ - "adcx %%rcx, %%r8 ;" - "adcx %%rbx, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcx %%rbx, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcx %%rbx, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", - "%r10", "%r11"); -} - -static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) -{ - asm volatile( - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */ - "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */ - "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */ - "addq %%r10, %%r9 ;" - "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */ - "adcq %%r11, %%r10 ;" - "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */ - "adcq %%rax, %%r11 ;" - /***************************************/ - "adcq $0, %%rcx ;" - "addq (%1), %%r8 ;" - "adcq 8(%1), %%r9 ;" - "adcq 16(%1), %%r10 ;" - "adcq 24(%1), %%r11 ;" - "adcq $0, %%rcx ;" - "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */ - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11"); -} - -static __always_inline void -add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b) -{ - asm volatile( - "mov $38, %%eax ;" - "xorl %%ecx, %%ecx ;" - "movq (%2), %%r8 ;" - "adcx (%1), %%r8 ;" - "movq 8(%2), %%r9 ;" - "adcx 8(%1), %%r9 ;" - "movq 16(%2), %%r10 ;" - "adcx 16(%1), %%r10 ;" - "movq 24(%2), %%r11 ;" - "adcx 24(%1), %%r11 ;" - "cmovc %%eax, %%ecx ;" - "xorl %%eax, %%eax ;" - "adcx %%rcx, %%r8 ;" - "adcx %%rax, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcx %%rax, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcx %%rax, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $38, %%ecx ;" - "cmovc %%ecx, %%eax ;" - "addq %%rax, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); -} - -static __always_inline void -add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b) -{ - asm volatile( - "mov $38, %%eax ;" - "movq (%2), %%r8 ;" - "addq (%1), %%r8 ;" - "movq 8(%2), %%r9 ;" - "adcq 8(%1), %%r9 ;" - "movq 16(%2), %%r10 ;" - "adcq 16(%1), %%r10 ;" - "movq 24(%2), %%r11 ;" - "adcq 24(%1), %%r11 ;" - "mov $0, %%ecx ;" - "cmovc %%eax, %%ecx ;" - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%eax, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); -} - -static __always_inline void -sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b) -{ - asm volatile( - "mov $38, %%eax ;" - "movq (%1), %%r8 ;" - "subq (%2), %%r8 ;" - "movq 8(%1), %%r9 ;" - "sbbq 8(%2), %%r9 ;" - "movq 16(%1), %%r10 ;" - "sbbq 16(%2), %%r10 ;" - "movq 24(%1), %%r11 ;" - "sbbq 24(%2), %%r11 ;" - "mov $0, %%ecx ;" - "cmovc %%eax, %%ecx ;" - "subq %%rcx, %%r8 ;" - "sbbq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "sbbq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "sbbq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%eax, %%ecx ;" - "subq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a), "r"(b) - : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11"); -} - -/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */ -static __always_inline void -mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a) -{ - const u64 a24 = 121666; - asm volatile( - "movq %2, %%rdx ;" - "mulx (%1), %%r8, %%r10 ;" - "mulx 8(%1), %%r9, %%r11 ;" - "addq %%r10, %%r9 ;" - "mulx 16(%1), %%r10, %%rax ;" - "adcq %%r11, %%r10 ;" - "mulx 24(%1), %%r11, %%rcx ;" - "adcq %%rax, %%r11 ;" - /**************************/ - "adcq $0, %%rcx ;" - "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/ - "imul %%rdx, %%rcx ;" - "addq %%rcx, %%r8 ;" - "adcq $0, %%r9 ;" - "movq %%r9, 8(%0) ;" - "adcq $0, %%r10 ;" - "movq %%r10, 16(%0) ;" - "adcq $0, %%r11 ;" - "movq %%r11, 24(%0) ;" - "mov $0, %%ecx ;" - "cmovc %%edx, %%ecx ;" - "addq %%rcx, %%r8 ;" - "movq %%r8, (%0) ;" - : - : "r"(c), "r"(a), "r"(a24) - : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", - "%r11"); -} - -static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a) -{ - struct { - eltfp25519_1w_buffer buffer; - eltfp25519_1w x0, x1, x2; - } __aligned(32) m; - u64 *T[4]; - - T[0] = m.x0; - T[1] = c; /* x^(-1) */ - T[2] = m.x1; - T[3] = m.x2; - - copy_eltfp25519_1w(T[1], a); - sqrn_eltfp25519_1w_adx(T[1], 1); - copy_eltfp25519_1w(T[2], T[1]); - sqrn_eltfp25519_1w_adx(T[2], 2); - mul_eltfp25519_1w_adx(T[0], a, T[2]); - mul_eltfp25519_1w_adx(T[1], T[1], T[0]); - copy_eltfp25519_1w(T[2], T[1]); - sqrn_eltfp25519_1w_adx(T[2], 1); - mul_eltfp25519_1w_adx(T[0], T[0], T[2]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_adx(T[2], 5); - mul_eltfp25519_1w_adx(T[0], T[0], T[2]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_adx(T[2], 10); - mul_eltfp25519_1w_adx(T[2], T[2], T[0]); - copy_eltfp25519_1w(T[3], T[2]); - sqrn_eltfp25519_1w_adx(T[3], 20); - mul_eltfp25519_1w_adx(T[3], T[3], T[2]); - sqrn_eltfp25519_1w_adx(T[3], 10); - mul_eltfp25519_1w_adx(T[3], T[3], T[0]); - copy_eltfp25519_1w(T[0], T[3]); - sqrn_eltfp25519_1w_adx(T[0], 50); - mul_eltfp25519_1w_adx(T[0], T[0], T[3]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_adx(T[2], 100); - mul_eltfp25519_1w_adx(T[2], T[2], T[0]); - sqrn_eltfp25519_1w_adx(T[2], 50); - mul_eltfp25519_1w_adx(T[2], T[2], T[3]); - sqrn_eltfp25519_1w_adx(T[2], 5); - mul_eltfp25519_1w_adx(T[1], T[1], T[2]); - - memzero_explicit(&m, sizeof(m)); -} - -static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a) -{ - struct { - eltfp25519_1w_buffer buffer; - eltfp25519_1w x0, x1, x2; - } __aligned(32) m; - u64 *T[5]; - - T[0] = m.x0; - T[1] = c; /* x^(-1) */ - T[2] = m.x1; - T[3] = m.x2; - - copy_eltfp25519_1w(T[1], a); - sqrn_eltfp25519_1w_bmi2(T[1], 1); - copy_eltfp25519_1w(T[2], T[1]); - sqrn_eltfp25519_1w_bmi2(T[2], 2); - mul_eltfp25519_1w_bmi2(T[0], a, T[2]); - mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]); - copy_eltfp25519_1w(T[2], T[1]); - sqrn_eltfp25519_1w_bmi2(T[2], 1); - mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_bmi2(T[2], 5); - mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_bmi2(T[2], 10); - mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); - copy_eltfp25519_1w(T[3], T[2]); - sqrn_eltfp25519_1w_bmi2(T[3], 20); - mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]); - sqrn_eltfp25519_1w_bmi2(T[3], 10); - mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]); - copy_eltfp25519_1w(T[0], T[3]); - sqrn_eltfp25519_1w_bmi2(T[0], 50); - mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]); - copy_eltfp25519_1w(T[2], T[0]); - sqrn_eltfp25519_1w_bmi2(T[2], 100); - mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]); - sqrn_eltfp25519_1w_bmi2(T[2], 50); - mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]); - sqrn_eltfp25519_1w_bmi2(T[2], 5); - mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]); - - memzero_explicit(&m, sizeof(m)); -} - -/* Given c, a 256-bit number, fred_eltfp25519_1w updates c - * with a number such that 0 <= C < 2**255-19. - */ -static __always_inline void fred_eltfp25519_1w(u64 *const c) -{ - u64 tmp0 = 38, tmp1 = 19; - asm volatile( - "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */ - "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */ - - /* Add either 19 or 38 to c */ - "addq %4, %0 ;" - "adcq $0, %1 ;" - "adcq $0, %2 ;" - "adcq $0, %3 ;" - - /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */ - "movl $0, %k4 ;" - "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */ - "btrq $63, %3 ;" /* Clear bit 255 */ - - /* Subtract 19 if necessary */ - "subq %4, %0 ;" - "sbbq $0, %1 ;" - "sbbq $0, %2 ;" - "sbbq $0, %3 ;" - - : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0), - "+r"(tmp1) - : - : "memory", "cc"); -} - -static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py) -{ - u64 temp; - asm volatile( - "test %9, %9 ;" - "movq %0, %8 ;" - "cmovnzq %4, %0 ;" - "cmovnzq %8, %4 ;" - "movq %1, %8 ;" - "cmovnzq %5, %1 ;" - "cmovnzq %8, %5 ;" - "movq %2, %8 ;" - "cmovnzq %6, %2 ;" - "cmovnzq %8, %6 ;" - "movq %3, %8 ;" - "cmovnzq %7, %3 ;" - "cmovnzq %8, %7 ;" - : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]), - "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]), - "=r"(temp) - : "r"(bit) - : "cc" - ); -} - -static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py) -{ - asm volatile( - "test %4, %4 ;" - "cmovnzq %5, %0 ;" - "cmovnzq %6, %1 ;" - "cmovnzq %7, %2 ;" - "cmovnzq %8, %3 ;" - : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]) - : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3]) - : "cc" - ); -} - -static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE], - const u8 private_key[CURVE25519_KEY_SIZE], - const u8 session_key[CURVE25519_KEY_SIZE]) -{ - struct { - u64 buffer[4 * NUM_WORDS_ELTFP25519]; - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; - u64 workspace[6 * NUM_WORDS_ELTFP25519]; - u8 session[CURVE25519_KEY_SIZE]; - u8 private[CURVE25519_KEY_SIZE]; - } __aligned(32) m; - - int i = 0, j = 0; - u64 prev = 0; - u64 *const X1 = (u64 *)m.session; - u64 *const key = (u64 *)m.private; - u64 *const Px = m.coordinates + 0; - u64 *const Pz = m.coordinates + 4; - u64 *const Qx = m.coordinates + 8; - u64 *const Qz = m.coordinates + 12; - u64 *const X2 = Qx; - u64 *const Z2 = Qz; - u64 *const X3 = Px; - u64 *const Z3 = Pz; - u64 *const X2Z2 = Qx; - u64 *const X3Z3 = Px; - - u64 *const A = m.workspace + 0; - u64 *const B = m.workspace + 4; - u64 *const D = m.workspace + 8; - u64 *const C = m.workspace + 12; - u64 *const DA = m.workspace + 16; - u64 *const CB = m.workspace + 20; - u64 *const AB = A; - u64 *const DC = D; - u64 *const DACB = DA; - - memcpy(m.private, private_key, sizeof(m.private)); - memcpy(m.session, session_key, sizeof(m.session)); - - curve25519_clamp_secret(m.private); - - /* As in the draft: - * When receiving such an array, implementations of curve25519 - * MUST mask the most-significant bit in the final byte. This - * is done to preserve compatibility with point formats which - * reserve the sign bit for use in other protocols and to - * increase resistance to implementation fingerprinting - */ - m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; - - copy_eltfp25519_1w(Px, X1); - setzero_eltfp25519_1w(Pz); - setzero_eltfp25519_1w(Qx); - setzero_eltfp25519_1w(Qz); - - Pz[0] = 1; - Qx[0] = 1; - - /* main-loop */ - prev = 0; - j = 62; - for (i = 3; i >= 0; --i) { - while (j >= 0) { - u64 bit = (key[i] >> j) & 0x1; - u64 swap = bit ^ prev; - prev = bit; - - add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */ - sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ - add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */ - sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ - mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ - - cselect(swap, A, C); - cselect(swap, B, D); - - sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */ - add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */ - sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ - sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ - - copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ - sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ - - mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ - add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */ - mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ - mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */ - --j; - } - j = 63; - } - - inv_eltfp25519_1w_adx(A, Qz); - mul_eltfp25519_1w_adx((u64 *)shared, Qx, A); - fred_eltfp25519_1w((u64 *)shared); - - memzero_explicit(&m, sizeof(m)); -} - -static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE], - const u8 private_key[CURVE25519_KEY_SIZE]) -{ - struct { - u64 buffer[4 * NUM_WORDS_ELTFP25519]; - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; - u64 workspace[4 * NUM_WORDS_ELTFP25519]; - u8 private[CURVE25519_KEY_SIZE]; - } __aligned(32) m; - - const int ite[4] = { 64, 64, 64, 63 }; - const int q = 3; u64 swap = 1; + int i, j, k; + u64 tmp[16 + 32 + 4]; + u64 *x1 = &tmp[0]; + u64 *z1 = &tmp[4]; + u64 *x2 = &tmp[8]; + u64 *z2 = &tmp[12]; + u64 *xz1 = &tmp[0]; + u64 *xz2 = &tmp[8]; + u64 *a = &tmp[0 + 16]; + u64 *b = &tmp[4 + 16]; + u64 *c = &tmp[8 + 16]; + u64 *ab = &tmp[0 + 16]; + u64 *abcd = &tmp[0 + 16]; + u64 *ef = &tmp[16 + 16]; + u64 *efgh = &tmp[16 + 16]; + u64 *key = &tmp[0 + 16 + 32]; - int i = 0, j = 0, k = 0; - u64 *const key = (u64 *)m.private; - u64 *const Ur1 = m.coordinates + 0; - u64 *const Zr1 = m.coordinates + 4; - u64 *const Ur2 = m.coordinates + 8; - u64 *const Zr2 = m.coordinates + 12; + memcpy(key, priv, 32); + ((u8 *)key)[0] &= 248; + ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; - u64 *const UZr1 = m.coordinates + 0; - u64 *const ZUr2 = m.coordinates + 8; + x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; + z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; + z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; + memcpy(x2, p_minus_s, sizeof(p_minus_s)); - u64 *const A = m.workspace + 0; - u64 *const B = m.workspace + 4; - u64 *const C = m.workspace + 8; - u64 *const D = m.workspace + 12; - - u64 *const AB = m.workspace + 0; - u64 *const CD = m.workspace + 8; - - const u64 *const P = table_ladder_8k; - - memcpy(m.private, private_key, sizeof(m.private)); - - curve25519_clamp_secret(m.private); - - setzero_eltfp25519_1w(Ur1); - setzero_eltfp25519_1w(Zr1); - setzero_eltfp25519_1w(Zr2); - Ur1[0] = 1; - Zr1[0] = 1; - Zr2[0] = 1; - - /* G-S */ - Ur2[3] = 0x1eaecdeee27cab34UL; - Ur2[2] = 0xadc7a0b9235d48e2UL; - Ur2[1] = 0xbbf095ae14b2edf8UL; - Ur2[0] = 0x7e94e1fec82faabdUL; - - /* main-loop */ - j = q; - for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { - while (j < ite[i]) { - u64 bit = (key[i] >> j) & 0x1; - k = (64 * i + j - q); + j = 3; + for (i = 0; i < 4; ++i) { + while (j < (const int[]){ 64, 64, 64, 63 }[i]) { + u64 bit = (key[i] >> j) & 1; + k = (64 * i + j - 3); swap = swap ^ bit; - cswap(swap, Ur1, Ur2); - cswap(swap, Zr1, Zr2); + cswap2(swap, xz1, xz2); swap = bit; - /* Addition */ - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ - add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ - mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */ - sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ - add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ - sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */ - mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ + fsub(b, x1, z1); + fadd(a, x1, z1); + fmul(c, &table_ladder[4 * k], b, ef); + fsub(b, a, c); + fadd(a, a, c); + fsqr2(ab, ab, efgh); + fmul2(xz1, xz2, ab, efgh); ++j; } j = 0; } - /* Doubling */ - for (i = 0; i < q; ++i) { - add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */ - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ - sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */ - copy_eltfp25519_1w(C, B); /* C = B */ - sub_eltfp25519_1w(B, A, B); /* B = A-B */ - mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ - add_eltfp25519_1w_adx(D, D, C); /* D = D+C */ - mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ - } + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + encode_point(out, xz1); - /* Convert to affine coordinates */ - inv_eltfp25519_1w_adx(A, Zr1); - mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A); - fred_eltfp25519_1w((u64 *)session_key); - - memzero_explicit(&m, sizeof(m)); + memzero_explicit(tmp, sizeof(tmp)); } -static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE], - const u8 private_key[CURVE25519_KEY_SIZE], - const u8 session_key[CURVE25519_KEY_SIZE]) -{ - struct { - u64 buffer[4 * NUM_WORDS_ELTFP25519]; - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; - u64 workspace[6 * NUM_WORDS_ELTFP25519]; - u8 session[CURVE25519_KEY_SIZE]; - u8 private[CURVE25519_KEY_SIZE]; - } __aligned(32) m; - - int i = 0, j = 0; - u64 prev = 0; - u64 *const X1 = (u64 *)m.session; - u64 *const key = (u64 *)m.private; - u64 *const Px = m.coordinates + 0; - u64 *const Pz = m.coordinates + 4; - u64 *const Qx = m.coordinates + 8; - u64 *const Qz = m.coordinates + 12; - u64 *const X2 = Qx; - u64 *const Z2 = Qz; - u64 *const X3 = Px; - u64 *const Z3 = Pz; - u64 *const X2Z2 = Qx; - u64 *const X3Z3 = Px; - - u64 *const A = m.workspace + 0; - u64 *const B = m.workspace + 4; - u64 *const D = m.workspace + 8; - u64 *const C = m.workspace + 12; - u64 *const DA = m.workspace + 16; - u64 *const CB = m.workspace + 20; - u64 *const AB = A; - u64 *const DC = D; - u64 *const DACB = DA; - - memcpy(m.private, private_key, sizeof(m.private)); - memcpy(m.session, session_key, sizeof(m.session)); - - curve25519_clamp_secret(m.private); - - /* As in the draft: - * When receiving such an array, implementations of curve25519 - * MUST mask the most-significant bit in the final byte. This - * is done to preserve compatibility with point formats which - * reserve the sign bit for use in other protocols and to - * increase resistance to implementation fingerprinting - */ - m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1; - - copy_eltfp25519_1w(Px, X1); - setzero_eltfp25519_1w(Pz); - setzero_eltfp25519_1w(Qx); - setzero_eltfp25519_1w(Qz); - - Pz[0] = 1; - Qx[0] = 1; - - /* main-loop */ - prev = 0; - j = 62; - for (i = 3; i >= 0; --i) { - while (j >= 0) { - u64 bit = (key[i] >> j) & 0x1; - u64 swap = bit ^ prev; - prev = bit; - - add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */ - sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */ - add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */ - sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */ - mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */ - - cselect(swap, A, C); - cselect(swap, B, D); - - sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */ - add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */ - sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */ - sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */ - - copy_eltfp25519_1w(X2, B); /* X2 = B^2 */ - sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */ - - mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */ - add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */ - mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */ - mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */ - --j; - } - j = 63; - } - - inv_eltfp25519_1w_bmi2(A, Qz); - mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A); - fred_eltfp25519_1w((u64 *)shared); - - memzero_explicit(&m, sizeof(m)); -} - -static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE], - const u8 private_key[CURVE25519_KEY_SIZE]) -{ - struct { - u64 buffer[4 * NUM_WORDS_ELTFP25519]; - u64 coordinates[4 * NUM_WORDS_ELTFP25519]; - u64 workspace[4 * NUM_WORDS_ELTFP25519]; - u8 private[CURVE25519_KEY_SIZE]; - } __aligned(32) m; - - const int ite[4] = { 64, 64, 64, 63 }; - const int q = 3; - u64 swap = 1; - - int i = 0, j = 0, k = 0; - u64 *const key = (u64 *)m.private; - u64 *const Ur1 = m.coordinates + 0; - u64 *const Zr1 = m.coordinates + 4; - u64 *const Ur2 = m.coordinates + 8; - u64 *const Zr2 = m.coordinates + 12; - - u64 *const UZr1 = m.coordinates + 0; - u64 *const ZUr2 = m.coordinates + 8; - - u64 *const A = m.workspace + 0; - u64 *const B = m.workspace + 4; - u64 *const C = m.workspace + 8; - u64 *const D = m.workspace + 12; - - u64 *const AB = m.workspace + 0; - u64 *const CD = m.workspace + 8; - - const u64 *const P = table_ladder_8k; - - memcpy(m.private, private_key, sizeof(m.private)); - - curve25519_clamp_secret(m.private); - - setzero_eltfp25519_1w(Ur1); - setzero_eltfp25519_1w(Zr1); - setzero_eltfp25519_1w(Zr2); - Ur1[0] = 1; - Zr1[0] = 1; - Zr2[0] = 1; - - /* G-S */ - Ur2[3] = 0x1eaecdeee27cab34UL; - Ur2[2] = 0xadc7a0b9235d48e2UL; - Ur2[1] = 0xbbf095ae14b2edf8UL; - Ur2[0] = 0x7e94e1fec82faabdUL; - - /* main-loop */ - j = q; - for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) { - while (j < ite[i]) { - u64 bit = (key[i] >> j) & 0x1; - k = (64 * i + j - q); - swap = swap ^ bit; - cswap(swap, Ur1, Ur2); - cswap(swap, Zr1, Zr2); - swap = bit; - /* Addition */ - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ - add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ - mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */ - sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */ - add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */ - sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */ - mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */ - ++j; - } - j = 0; - } - - /* Doubling */ - for (i = 0; i < q; ++i) { - add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */ - sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */ - sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */ - copy_eltfp25519_1w(C, B); /* C = B */ - sub_eltfp25519_1w(B, A, B); /* B = A-B */ - mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */ - add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */ - mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */ - } - - /* Convert to affine coordinates */ - inv_eltfp25519_1w_bmi2(A, Zr1); - mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A); - fred_eltfp25519_1w((u64 *)session_key); - - memzero_explicit(&m, sizeof(m)); -} +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]) { - if (static_branch_likely(&curve25519_use_adx)) - curve25519_adx(mypublic, secret, basepoint); - else if (static_branch_likely(&curve25519_use_bmi2)) - curve25519_bmi2(mypublic, secret, basepoint); + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64(mypublic, secret, basepoint); else curve25519_generic(mypublic, secret, basepoint); } @@ -2355,10 +1395,8 @@ EXPORT_SYMBOL(curve25519_arch); void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]) { - if (static_branch_likely(&curve25519_use_adx)) - curve25519_adx_base(pub, secret); - else if (static_branch_likely(&curve25519_use_bmi2)) - curve25519_bmi2_base(pub, secret); + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64_base(pub, secret); else curve25519_generic(pub, secret, curve25519_base_point); } @@ -2449,12 +1487,11 @@ static struct kpp_alg curve25519_alg = { .max_size = curve25519_max_size, }; + static int __init curve25519_mod_init(void) { - if (boot_cpu_has(X86_FEATURE_BMI2)) - static_branch_enable(&curve25519_use_bmi2); - else if (boot_cpu_has(X86_FEATURE_ADX)) - static_branch_enable(&curve25519_use_adx); + if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) + static_branch_enable(&curve25519_use_bmi2_adx); else return 0; return IS_REACHABLE(CONFIG_CRYPTO_KPP) ? @@ -2474,3 +1511,4 @@ module_exit(curve25519_mod_exit); MODULE_ALIAS_CRYPTO("curve25519"); MODULE_ALIAS_CRYPTO("curve25519-x86"); MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Jason A. Donenfeld "); From ce0fc6db38decf0d2919bfe783de6d6b76e421a9 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 29 Jan 2020 16:37:54 +0200 Subject: [PATCH 007/162] crypto: ccree - protect against empty or NULL scatterlists Deal gracefully with a NULL or empty scatterlist which can happen if both cryptlen and assoclen are zero and we're doing in-place AEAD encryption. This fixes a crash when this causes us to try and map a NULL page, at least with some platforms / DMA mapping configs. Cc: stable@vger.kernel.org # v4.19+ Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 62 ++++++++++++---------------- drivers/crypto/ccree/cc_buffer_mgr.h | 1 + 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index a72586eccd81..b938ceae7ae7 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -87,6 +87,8 @@ static unsigned int cc_get_sgl_nents(struct device *dev, { unsigned int nents = 0; + *lbytes = 0; + while (nbytes && sg_list) { nents++; /* get the number of bytes in the last entry */ @@ -95,6 +97,7 @@ static unsigned int cc_get_sgl_nents(struct device *dev, nbytes : sg_list->length; sg_list = sg_next(sg_list); } + dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes); return nents; } @@ -290,37 +293,25 @@ static int cc_map_sg(struct device *dev, struct scatterlist *sg, unsigned int nbytes, int direction, u32 *nents, u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents) { - if (sg_is_last(sg)) { - /* One entry only case -set to DLLI */ - if (dma_map_sg(dev, sg, 1, direction) != 1) { - dev_err(dev, "dma_map_sg() single buffer failed\n"); - return -ENOMEM; - } - dev_dbg(dev, "Mapped sg: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n", - &sg_dma_address(sg), sg_page(sg), sg_virt(sg), - sg->offset, sg->length); - *lbytes = nbytes; - *nents = 1; - *mapped_nents = 1; - } else { /*sg_is_last*/ - *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); - if (*nents > max_sg_nents) { - *nents = 0; - dev_err(dev, "Too many fragments. current %d max %d\n", - *nents, max_sg_nents); - return -ENOMEM; - } - /* In case of mmu the number of mapped nents might - * be changed from the original sgl nents - */ - *mapped_nents = dma_map_sg(dev, sg, *nents, direction); - if (*mapped_nents == 0) { - *nents = 0; - dev_err(dev, "dma_map_sg() sg buffer failed\n"); - return -ENOMEM; - } + int ret = 0; + + *nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes); + if (*nents > max_sg_nents) { + *nents = 0; + dev_err(dev, "Too many fragments. current %d max %d\n", + *nents, max_sg_nents); + return -ENOMEM; } + ret = dma_map_sg(dev, sg, *nents, direction); + if (dma_mapping_error(dev, ret)) { + *nents = 0; + dev_err(dev, "dma_map_sg() sg buffer failed %d\n", ret); + return -ENOMEM; + } + + *mapped_nents = ret; + return 0; } @@ -555,11 +546,12 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents, areq_ctx->assoclen, req->cryptlen); - dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, + DMA_BIDIRECTIONAL); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); - dma_unmap_sg(dev, req->dst, sg_nents(req->dst), + dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_BIDIRECTIONAL); } if (drvdata->coherent && @@ -881,7 +873,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, &src_last_bytes); sg_index = areq_ctx->src_sgl->length; //check where the data starts - while (sg_index <= size_to_skip) { + while (src_mapped_nents && (sg_index <= size_to_skip)) { src_mapped_nents--; offset -= areq_ctx->src_sgl->length; sgl = sg_next(areq_ctx->src_sgl); @@ -908,7 +900,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, size_for_map += crypto_aead_ivsize(tfm); rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL, - &areq_ctx->dst.nents, + &areq_ctx->dst.mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes, &dst_mapped_nents); if (rc) @@ -921,7 +913,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, offset = size_to_skip; //check where the data starts - while (sg_index <= size_to_skip) { + while (dst_mapped_nents && sg_index <= size_to_skip) { dst_mapped_nents--; offset -= areq_ctx->dst_sgl->length; sgl = sg_next(areq_ctx->dst_sgl); @@ -1123,7 +1115,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) if (is_gcm4543) size_to_map += crypto_aead_ivsize(tfm); rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, - &areq_ctx->src.nents, + &areq_ctx->src.mapped_nents, (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES + LLI_MAX_NUM_OF_DATA_ENTRIES), &dummy, &mapped_nents); diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h index af434872c6ff..827b6cb1236e 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.h +++ b/drivers/crypto/ccree/cc_buffer_mgr.h @@ -25,6 +25,7 @@ enum cc_sg_cpy_direct { struct cc_mlli { cc_sram_addr_t sram_addr; + unsigned int mapped_nents; unsigned int nents; //sg nents unsigned int mlli_nents; //mlli nents might be different than the above }; From 504e84abec7a635b861afd8d7f92ecd13eaa2b09 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 29 Jan 2020 16:37:55 +0200 Subject: [PATCH 008/162] crypto: ccree - only try to map auth tag if needed Make sure to only add the size of the auth tag to the source mapping for encryption if it is an in-place operation. Failing to do this previously caused us to try and map auth size len bytes from a NULL mapping and crashing if both the cryptlen and assoclen are zero. Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: Gilad Ben-Yossef Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index b938ceae7ae7..885347b5b372 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -1109,9 +1109,11 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) } size_to_map = req->cryptlen + areq_ctx->assoclen; - if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) + /* If we do in-place encryption, we also need the auth tag */ + if ((areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) && + (req->src == req->dst)) { size_to_map += authsize; - + } if (is_gcm4543) size_to_map += crypto_aead_ivsize(tfm); rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, From 4aaefb621e9c79de31f3ba2f3b4777ce910f9de9 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 29 Jan 2020 16:37:56 +0200 Subject: [PATCH 009/162] crypto: ccree - fix some reported cipher block sizes OFB and CTR modes block sizes were wrongfully reported as the underlying block sizes. Fix it to 1 bytes as they turn the block ciphers into stream ciphers. Also document why our XTS differes from the generic implementation. Signed-off-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_cipher.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 7d6252d892d7..fc1646ccfe29 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -1228,6 +1228,10 @@ static const struct cc_alg_template skcipher_algs[] = { .sec_func = true, }, { + /* See https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg40576.html + * for the reason why this differs from the generic + * implementation. + */ .name = "xts(aes)", .driver_name = "xts-aes-ccree", .blocksize = 1, @@ -1423,7 +1427,7 @@ static const struct cc_alg_template skcipher_algs[] = { { .name = "ofb(aes)", .driver_name = "ofb-aes-ccree", - .blocksize = AES_BLOCK_SIZE, + .blocksize = 1, .template_skcipher = { .setkey = cc_cipher_setkey, .encrypt = cc_cipher_encrypt, @@ -1576,7 +1580,7 @@ static const struct cc_alg_template skcipher_algs[] = { { .name = "ctr(sm4)", .driver_name = "ctr-sm4-ccree", - .blocksize = SM4_BLOCK_SIZE, + .blocksize = 1, .template_skcipher = { .setkey = cc_cipher_setkey, .encrypt = cc_cipher_encrypt, From 21f802cc9896743c61bacac6cb1de109a2cfc8bc Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 29 Jan 2020 16:37:57 +0200 Subject: [PATCH 010/162] crypto: ccree - fix AEAD blocksize registration Fix an error causing no block sizes to be reported during all AEAD registrations. Signed-off-by: Gilad Ben-Yossef Tested-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 2fc0e0da790b..0d95bda4de70 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -2628,6 +2628,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, alg->base.cra_ctxsize = sizeof(struct cc_aead_ctx); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY; + alg->base.cra_blocksize = tmpl->blocksize; alg->init = cc_aead_init; alg->exit = cc_aead_exit; From 11144416a7557372e64f68c729f3d7cb8890a6d1 Mon Sep 17 00:00:00 2001 From: Valentin Ciocoi Radulescu Date: Fri, 31 Jan 2020 14:15:56 +0000 Subject: [PATCH 011/162] crypto: caam/qi - optimize frame queue cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add reference counter incremented for each frame enqueued in CAAM and replace unconditional sleep in empty_caam_fq() with polling the reference counter. When CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y boot time on LS1043A platform with this optimization decreases from ~1100s to ~11s. Signed-off-by: Valentin Ciocoi Radulescu Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/qi.c | 60 ++++++++++++++++++++++++++-------------- drivers/crypto/caam/qi.h | 4 ++- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index dacf2fa4aa8e..b390b935db6d 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -4,7 +4,7 @@ * Queue Interface backend functionality * * Copyright 2013-2016 Freescale Semiconductor, Inc. - * Copyright 2016-2017, 2019 NXP + * Copyright 2016-2017, 2019-2020 NXP */ #include @@ -124,8 +124,10 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) do { ret = qman_enqueue(req->drv_ctx->req_fq, &fd); - if (likely(!ret)) + if (likely(!ret)) { + refcount_inc(&req->drv_ctx->refcnt); return 0; + } if (ret != -EBUSY) break; @@ -148,11 +150,6 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, fd = &msg->ern.fd; - if (qm_fd_get_format(fd) != qm_fd_compound) { - dev_err(qidev, "Non-compound FD from CAAM\n"); - return; - } - drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); if (!drv_req) { dev_err(qidev, @@ -160,6 +157,13 @@ static void caam_fq_ern_cb(struct qman_portal *qm, struct qman_fq *fq, return; } + refcount_dec(&drv_req->drv_ctx->refcnt); + + if (qm_fd_get_format(fd) != qm_fd_compound) { + dev_err(qidev, "Non-compound FD from CAAM\n"); + return; + } + dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); @@ -287,9 +291,10 @@ empty_fq: return ret; } -static int empty_caam_fq(struct qman_fq *fq) +static int empty_caam_fq(struct qman_fq *fq, struct caam_drv_ctx *drv_ctx) { int ret; + int retries = 10; struct qm_mcr_queryfq_np np; /* Wait till the older CAAM FQ get empty */ @@ -304,11 +309,18 @@ static int empty_caam_fq(struct qman_fq *fq) msleep(20); } while (1); - /* - * Give extra time for pending jobs from this FQ in holding tanks - * to get processed - */ - msleep(20); + /* Wait until pending jobs from this FQ are processed by CAAM */ + do { + if (refcount_read(&drv_ctx->refcnt) == 1) + break; + + msleep(20); + } while (--retries); + + if (!retries) + dev_warn_once(drv_ctx->qidev, "%d frames from FQID %u still pending in CAAM\n", + refcount_read(&drv_ctx->refcnt), fq->fqid); + return 0; } @@ -340,7 +352,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc) drv_ctx->req_fq = new_fq; /* Empty and remove the older FQ */ - ret = empty_caam_fq(old_fq); + ret = empty_caam_fq(old_fq, drv_ctx); if (ret) { dev_err(qidev, "Old CAAM FQ empty failed: %d\n", ret); @@ -453,6 +465,9 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, return ERR_PTR(-ENOMEM); } + /* init reference counter used to track references to request FQ */ + refcount_set(&drv_ctx->refcnt, 1); + drv_ctx->qidev = qidev; return drv_ctx; } @@ -571,6 +586,16 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, return qman_cb_dqrr_stop; fd = &dqrr->fd; + + drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); + if (unlikely(!drv_req)) { + dev_err(qidev, + "Can't find original request for caam response\n"); + return qman_cb_dqrr_consume; + } + + refcount_dec(&drv_req->drv_ctx->refcnt); + status = be32_to_cpu(fd->status); if (unlikely(status)) { u32 ssrc = status & JRSTA_SSRC_MASK; @@ -588,13 +613,6 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p, return qman_cb_dqrr_consume; } - drv_req = caam_iova_to_virt(priv->domain, qm_fd_addr_get64(fd)); - if (unlikely(!drv_req)) { - dev_err(qidev, - "Can't find original request for caam response\n"); - return qman_cb_dqrr_consume; - } - dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd), sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL); diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h index 848958951f68..5894f16f8fe3 100644 --- a/drivers/crypto/caam/qi.h +++ b/drivers/crypto/caam/qi.h @@ -3,7 +3,7 @@ * Public definitions for the CAAM/QI (Queue Interface) backend. * * Copyright 2013-2016 Freescale Semiconductor, Inc. - * Copyright 2016-2017 NXP + * Copyright 2016-2017, 2020 NXP */ #ifndef __QI_H__ @@ -52,6 +52,7 @@ enum optype { * @context_a: shared descriptor dma address * @req_fq: to-CAAM request frame queue * @rsp_fq: from-CAAM response frame queue + * @refcnt: reference counter incremented for each frame enqueued in to-CAAM FQ * @cpu: cpu on which to receive CAAM response * @op_type: operation type * @qidev: device pointer for CAAM/QI backend @@ -62,6 +63,7 @@ struct caam_drv_ctx { dma_addr_t context_a; struct qman_fq *req_fq; struct qman_fq *rsp_fq; + refcount_t refcnt; int cpu; enum optype op_type; struct device *qidev; From 8962c6d2c2b8ca51b0f188109015b15fc5f4da44 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Sun, 2 Feb 2020 18:19:14 +0200 Subject: [PATCH 012/162] crypto: ccree - dec auth tag size from cryptlen map Remove the auth tag size from cryptlen before mapping the destination in out-of-place AEAD decryption thus resolving a crash with extended testmgr tests. Signed-off-by: Gilad Ben-Yossef Reported-by: Geert Uytterhoeven Cc: stable@vger.kernel.org # v4.19+ Tested-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 885347b5b372..954f14bddf1d 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -894,8 +894,12 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, if (req->src != req->dst) { size_for_map = areq_ctx->assoclen + req->cryptlen; - size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? - authsize : 0; + + if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) + size_for_map += authsize; + else + size_for_map -= authsize; + if (is_gcm4543) size_for_map += crypto_aead_ivsize(tfm); From 20f513091caf4015cf2f40319cbe38a88234d097 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 3 Feb 2020 15:39:21 +0000 Subject: [PATCH 013/162] crypto: ccree - remove set but not used variable 'du_size' Fixes gcc '-Wunused-but-set-variable' warning: drivers/crypto/ccree/cc_cipher.c: In function 'cc_setup_state_desc': drivers/crypto/ccree/cc_cipher.c:536:15: warning: variable 'du_size' set but not used [-Wunused-but-set-variable] commit 5c83e8ec4d51 ("crypto: ccree - fix FDE descriptor sequence") involved this unused variable, so remove it. Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_cipher.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index fc1646ccfe29..2679eb4a2418 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -533,14 +533,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, int flow_mode = ctx_p->flow_mode; int direction = req_ctx->gen_ctx.op_type; dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; - unsigned int du_size = nbytes; - - struct cc_crypto_alg *cc_alg = - container_of(tfm->__crt_alg, struct cc_crypto_alg, - skcipher_alg.base); - - if (cc_alg->data_unit) - du_size = cc_alg->data_unit; switch (cipher_mode) { case DRV_CIPHER_ECB: From 4fb3d8ba2824db0b8553602daef82fe27f50bba2 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 4 Feb 2020 00:39:33 +0100 Subject: [PATCH 014/162] crypto: arm64/poly1305 - ignore build files Add arch/arm64/crypto/poly1305-core.S to .gitignore as it's built from poly1305-core.S_shipped Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") Signed-off-by: Matteo Croce Signed-off-by: Herbert Xu --- arch/arm64/crypto/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore index 879df8781ed5..e403b1343328 100644 --- a/arch/arm64/crypto/.gitignore +++ b/arch/arm64/crypto/.gitignore @@ -1,2 +1,3 @@ sha256-core.S sha512-core.S +poly1305-core.S From 1c502e2e2d79b6a4c800c3806c70b58ad6ae784d Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 5 Feb 2020 10:48:41 +0530 Subject: [PATCH 015/162] crypto: chelsio - This fixes the libkcapi's cbc(aes) aio fail test cases The libkcapi "cbc(aes)" failed tests are symmetric asynchronous cipher one shot multiple test, symmetric asynchronous cipher stream multiple test, Symmetric asynchronous cipher vmsplice multiple test In this patch a wait_for_completion is added in the chcr_aes_encrypt function, which completes when the response of comes from the hardware. This adds serialization for encryption in cbc(aes) aio case. Signed-off-by: Ayush Sawal Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 20 +++++++++++++++++++- drivers/crypto/chelsio/chcr_crypto.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index b4b9b22125d1..699e3053895a 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -1102,6 +1102,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, unsigned char *input, int err) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_context *ctx = c_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm)); struct sk_buff *skb; @@ -1166,10 +1167,20 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, chcr_send_wr(skb); reqctx->last_req_len = bytes; reqctx->processed += bytes; + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + complete(&ctx->cbc_aes_aio_done); + } return 0; unmap: chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev, req); complete: + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + complete(&ctx->cbc_aes_aio_done); + } chcr_dec_wrcount(dev); req->base.complete(&req->base, err); return err; @@ -1289,6 +1300,7 @@ error: static int chcr_aes_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_context *ctx; struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; int err, isfull = 0; @@ -1313,6 +1325,12 @@ static int chcr_aes_encrypt(struct skcipher_request *req) skb->dev = u_ctx->lldi.ports[0]; set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); chcr_send_wr(skb); + if (get_cryptoalg_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == + CRYPTO_TFM_REQ_MAY_SLEEP ) { + ctx=c_ctx(tfm); + wait_for_completion(&ctx->cbc_aes_aio_done); + } return isfull ? -EBUSY : -EINPROGRESS; error: chcr_dec_wrcount(dev); @@ -1401,7 +1419,7 @@ static int chcr_init_tfm(struct crypto_skcipher *tfm) pr_err("failed to allocate fallback for %s\n", alg->base.cra_name); return PTR_ERR(ablkctx->sw_cipher); } - + init_completion(&ctx->cbc_aes_aio_done); crypto_skcipher_set_reqsize(tfm, sizeof(struct chcr_skcipher_req_ctx)); return chcr_device_init(ctx); diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index 6db2df8c8a05..9207d88c5538 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -254,6 +254,7 @@ struct chcr_context { unsigned char rx_qidx; unsigned char tx_chan_id; unsigned char pci_chan_id; + struct completion cbc_aes_aio_done; struct __crypto_ctx crypto_ctx[0]; }; From 9195189e00a7db55e7d448cee973cae87c5a3c71 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 5 Feb 2020 10:48:42 +0530 Subject: [PATCH 016/162] crypto: chelsio - This fixes the kernel panic which occurs during a libkcapi test The libkcapi test which causes kernel panic is aead asynchronous vmsplice multiple test. ./bin/kcapi -v -d 4 -x 10 -c "ccm(aes)" -q 4edb58e8d5eb6bc711c43a6f3693daebde2e5524f1b55297abb29f003236e43d -t a7877c99 -n 674742abd0f5ba -k 2861fd0253705d7875c95ba8a53171b4 -a fb7bc304a3909e66e2e0c5ef952712dd884ce3e7324171369f2c5db1adc48c7d This patch avoids dma_mapping of a zero length sg which causes the panic, by using sg_nents_for_len which maps only upto a specific length Signed-off-by: Ayush Sawal Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 699e3053895a..02b0ddb785a4 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -2489,8 +2489,9 @@ int chcr_aead_dma_map(struct device *dev, else reqctx->b0_dma = 0; if (req->src == req->dst) { - error = dma_map_sg(dev, req->src, sg_nents(req->src), - DMA_BIDIRECTIONAL); + error = dma_map_sg(dev, req->src, + sg_nents_for_len(req->src, dst_size), + DMA_BIDIRECTIONAL); if (!error) goto err; } else { From 7f1cfe41cc298b4087668554fa0b91cef6094945 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Wed, 5 Feb 2020 15:25:23 +0800 Subject: [PATCH 017/162] crypto: proc - simplify the c_show function The path with the CRYPTO_ALG_LARVAL flag has jumped to the end before Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- crypto/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/proc.c b/crypto/proc.c index 7b91557adccb..08d8c2bc7e62 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -60,7 +60,7 @@ static int c_show(struct seq_file *m, void *p) goto out; } - switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) { + switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_CIPHER: seq_printf(m, "type : cipher\n"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); From 8e3b7fd7ea554ccb1bdc596bfbcdaf56f7ab017c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Wed, 5 Feb 2020 12:19:58 +0200 Subject: [PATCH 018/162] crypto: tcrypt - fix printed skcipher [a]sync mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running tcrypt skcipher speed tests, logs contain things like: testing speed of async ecb(des3_ede) (ecb(des3_ede-generic)) encryption or: testing speed of async ecb(aes) (ecb(aes-ce)) encryption The algorithm implementations are sync, not async. Fix this inaccuracy. Fixes: 7166e589da5b6 ("crypto: tcrypt - Use skcipher") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index f42f486e90e8..ba0b7702f2e9 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1514,8 +1514,8 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, return; } - pr_info("\ntesting speed of async %s (%s) %s\n", algo, - get_driver_name(crypto_skcipher, tfm), e); + pr_info("\ntesting speed of %s %s (%s) %s\n", async ? "async" : "sync", + algo, get_driver_name(crypto_skcipher, tfm), e); req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!req) { From 00adbe714b28043e8051dfd6d709541c52f5aa45 Mon Sep 17 00:00:00 2001 From: Devulapally Shiva Krishna Date: Wed, 5 Feb 2020 16:04:33 +0530 Subject: [PATCH 019/162] crypto: chelsio - Print the chcr driver information while module load. No logs are recorded in dmesg during chcr module load, hence adding the print and also appending -ko to driver version. Signed-off-by: Devulapally Shiva Krishna Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_core.c | 1 + drivers/crypto/chelsio/chcr_core.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index e937605670ac..507ba20f0874 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -190,6 +190,7 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld) struct uld_ctx *u_ctx; /* Create the device and add it in the device list */ + pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION); if (!(lld->ulp_crypto & ULP_CRYPTO_LOOKASIDE)) return ERR_PTR(-EOPNOTSUPP); diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index ad874d548aa5..b41ef1abfe74 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -43,7 +43,8 @@ #include "cxgb4_uld.h" #define DRV_MODULE_NAME "chcr" -#define DRV_VERSION "1.0.0.0" +#define DRV_VERSION "1.0.0.0-ko" +#define DRV_DESC "Chelsio T6 Crypto Co-processor Driver" #define MAX_PENDING_REQ_TO_HW 20 #define CHCR_TEST_RESPONSE_TIMEOUT 1000 From 9ce9a5d5c362e392791319edef4ef918ed0ddfca Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Wed, 5 Feb 2020 22:01:30 +0800 Subject: [PATCH 020/162] crypto: allwinner - remove redundant platform_get_irq error message Function dev_err() after platform_get_irq() is redundant because platform_get_irq() already prints an error. Signed-off-by: Chen Zhou Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index f72346a44e69..3e4e4bbda34c 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -565,10 +565,8 @@ static int sun8i_ce_probe(struct platform_device *pdev) /* Get Non Secure IRQ */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(ce->dev, "Cannot get CryptoEngine Non-secure IRQ\n"); + if (irq < 0) return irq; - } ce->reset = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(ce->reset)) { From d6364b8128439a8c0e381f80c38667de9f15eef8 Mon Sep 17 00:00:00 2001 From: Eneas U de Queiroz Date: Fri, 7 Feb 2020 12:02:25 -0300 Subject: [PATCH 021/162] crypto: qce - use cryptlen when adding extra sgl The qce crypto driver appends an extra entry to the dst sgl, to maintain private state information. When the gcm driver sends requests to the ctr skcipher, it passes the authentication tag after the actual crypto payload, but it must not be touched. Commit 1336c2221bee ("crypto: qce - save a sg table slot for result buf") limited the destination sgl to avoid overwriting the authentication tag but it assumed the tag would be in a separate sgl entry. This is not always the case, so it is better to limit the length of the destination buffer to req->cryptlen before appending the result buf. Signed-off-by: Eneas U de Queiroz Signed-off-by: Herbert Xu --- drivers/crypto/qce/dma.c | 11 ++++++----- drivers/crypto/qce/dma.h | 2 +- drivers/crypto/qce/skcipher.c | 5 +++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/qce/dma.c b/drivers/crypto/qce/dma.c index 7da893dc00e7..46db5bf366b4 100644 --- a/drivers/crypto/qce/dma.c +++ b/drivers/crypto/qce/dma.c @@ -48,9 +48,10 @@ void qce_dma_release(struct qce_dma_data *dma) struct scatterlist * qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, - int max_ents) + unsigned int max_len) { struct scatterlist *sg = sgt->sgl, *sg_last = NULL; + unsigned int new_len; while (sg) { if (!sg_page(sg)) @@ -61,13 +62,13 @@ qce_sgtable_add(struct sg_table *sgt, struct scatterlist *new_sgl, if (!sg) return ERR_PTR(-EINVAL); - while (new_sgl && sg && max_ents) { - sg_set_page(sg, sg_page(new_sgl), new_sgl->length, - new_sgl->offset); + while (new_sgl && sg && max_len) { + new_len = new_sgl->length > max_len ? max_len : new_sgl->length; + sg_set_page(sg, sg_page(new_sgl), new_len, new_sgl->offset); sg_last = sg; sg = sg_next(sg); new_sgl = sg_next(new_sgl); - max_ents--; + max_len -= new_len; } return sg_last; diff --git a/drivers/crypto/qce/dma.h b/drivers/crypto/qce/dma.h index ed25a0d9829e..786402169360 100644 --- a/drivers/crypto/qce/dma.h +++ b/drivers/crypto/qce/dma.h @@ -43,6 +43,6 @@ void qce_dma_issue_pending(struct qce_dma_data *dma); int qce_dma_terminate_all(struct qce_dma_data *dma); struct scatterlist * qce_sgtable_add(struct sg_table *sgt, struct scatterlist *sg_add, - int max_ents); + unsigned int max_len); #endif /* _DMA_H_ */ diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index 4217b745f124..63ae75809cb7 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -97,13 +97,14 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req) sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ); - sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, rctx->dst_nents - 1); + sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, req->cryptlen); if (IS_ERR(sg)) { ret = PTR_ERR(sg); goto error_free; } - sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, 1); + sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->result_sg, + QCE_RESULT_BUF_SZ); if (IS_ERR(sg)) { ret = PTR_ERR(sg); goto error_free; From ce163ba0bf298f1707321ac025ef639f88e62801 Mon Sep 17 00:00:00 2001 From: Eneas U de Queiroz Date: Fri, 7 Feb 2020 12:02:26 -0300 Subject: [PATCH 022/162] crypto: qce - use AES fallback for small requests Process small blocks using the fallback cipher, as a workaround for an observed failure (DMA-related, apparently) when computing the GCM ghash key. This brings a speed gain as well, since it avoids the latency of using the hardware engine to process small blocks. Using software for all 16-byte requests would be enough to make GCM work, but to increase performance, a larger threshold would be better. Measuring the performance of supported ciphers with openssl speed, software matches hardware at around 768-1024 bytes. Considering the 256-bit ciphers, software is 2-3 times faster than qce at 256-bytes, 30% faster at 512, and about even at 768-bytes. With 128-bit keys, the break-even point would be around 1024-bytes. This adds the 'aes_sw_max_len' parameter, to set the largest request length processed by the software fallback. Its default is being set to 512 bytes, a little lower than the break-even point, to balance the cost in CPU usage. Signed-off-by: Eneas U de Queiroz Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 23 +++++++++++++++++++++++ drivers/crypto/qce/skcipher.c | 20 ++++++++++++-------- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c2767ed54dfe..052d3ff7fb20 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -685,6 +685,29 @@ choice endchoice +config CRYPTO_DEV_QCE_SW_MAX_LEN + int "Default maximum request size to use software for AES" + depends on CRYPTO_DEV_QCE && CRYPTO_DEV_QCE_SKCIPHER + default 512 + help + This sets the default maximum request size to perform AES requests + using software instead of the crypto engine. It can be changed by + setting the aes_sw_max_len parameter. + + Small blocks are processed faster in software than hardware. + Considering the 256-bit ciphers, software is 2-3 times faster than + qce at 256-bytes, 30% faster at 512, and about even at 768-bytes. + With 128-bit keys, the break-even point would be around 1024-bytes. + + The default is set a little lower, to 512 bytes, to balance the + cost in CPU usage. The minimum recommended setting is 16-bytes + (1 AES block), since AES-GCM will fail if you set it lower. + Setting this to zero will send all requests to the hardware. + + Note that 192-bit keys are not supported by the hardware and are + always processed by the software fallback, and all DES requests + are done by the hardware. + config CRYPTO_DEV_QCOM_RNG tristate "Qualcomm Random Number Generator Driver" depends on ARCH_QCOM || COMPILE_TEST diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index 63ae75809cb7..fc7c940b5a43 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -12,6 +13,13 @@ #include "cipher.h" +static unsigned int aes_sw_max_len = CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN; +module_param(aes_sw_max_len, uint, 0644); +MODULE_PARM_DESC(aes_sw_max_len, + "Only use hardware for AES requests larger than this " + "[0=always use hardware; anything <16 breaks AES-GCM; default=" + __stringify(CONFIG_CRYPTO_DEV_QCE_SOFT_THRESHOLD)"]"); + static LIST_HEAD(skcipher_algs); static void qce_skcipher_done(void *data) @@ -166,15 +174,10 @@ static int qce_skcipher_setkey(struct crypto_skcipher *ablk, const u8 *key, switch (IS_XTS(flags) ? keylen >> 1 : keylen) { case AES_KEYSIZE_128: case AES_KEYSIZE_256: + memcpy(ctx->enc_key, key, keylen); break; - default: - goto fallback; } - ctx->enc_keylen = keylen; - memcpy(ctx->enc_key, key, keylen); - return 0; -fallback: ret = crypto_sync_skcipher_setkey(ctx->fallback, key, keylen); if (!ret) ctx->enc_keylen = keylen; @@ -224,8 +227,9 @@ static int qce_skcipher_crypt(struct skcipher_request *req, int encrypt) rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; - if (IS_AES(rctx->flags) && keylen != AES_KEYSIZE_128 && - keylen != AES_KEYSIZE_256) { + if (IS_AES(rctx->flags) && + ((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || + req->cryptlen <= aes_sw_max_len)) { SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); skcipher_request_set_sync_tfm(subreq, ctx->fallback); From 7f19380b2cfd412dcef2facefb3f6c62788864d7 Mon Sep 17 00:00:00 2001 From: Eneas U de Queiroz Date: Fri, 7 Feb 2020 12:02:27 -0300 Subject: [PATCH 023/162] crypto: qce - handle AES-XTS cases that qce fails QCE hangs when presented with an AES-XTS request whose length is larger than QCE_SECTOR_SIZE (512-bytes), and is not a multiple of it. Let the fallback cipher handle them. Signed-off-by: Eneas U de Queiroz Signed-off-by: Herbert Xu --- drivers/crypto/qce/common.c | 2 -- drivers/crypto/qce/common.h | 3 +++ drivers/crypto/qce/skcipher.c | 9 +++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index 629e7f34dc09..5006e74c40cd 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -15,8 +15,6 @@ #include "regs-v5.h" #include "sha.h" -#define QCE_SECTOR_SIZE 512 - static inline u32 qce_read(struct qce_device *qce, u32 offset) { return readl(qce->base + offset); diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h index 282d4317470d..9f989cba0f1b 100644 --- a/drivers/crypto/qce/common.h +++ b/drivers/crypto/qce/common.h @@ -12,6 +12,9 @@ #include #include +/* xts du size */ +#define QCE_SECTOR_SIZE 512 + /* key size in bytes */ #define QCE_SHA_HMAC_KEY_SIZE 64 #define QCE_MAX_CIPHER_KEY_SIZE AES_KEYSIZE_256 diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index fc7c940b5a43..a4f6ec1b64c7 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -227,9 +227,14 @@ static int qce_skcipher_crypt(struct skcipher_request *req, int encrypt) rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT; keylen = IS_XTS(rctx->flags) ? ctx->enc_keylen >> 1 : ctx->enc_keylen; + /* qce is hanging when AES-XTS request len > QCE_SECTOR_SIZE and + * is not a multiple of it; pass such requests to the fallback + */ if (IS_AES(rctx->flags) && - ((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || - req->cryptlen <= aes_sw_max_len)) { + (((keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256) || + req->cryptlen <= aes_sw_max_len) || + (IS_XTS(rctx->flags) && req->cryptlen > QCE_SECTOR_SIZE && + req->cryptlen % QCE_SECTOR_SIZE))) { SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); skcipher_request_set_sync_tfm(subreq, ctx->fallback); From f426faac80c9cca7973de10d903dc772627a5f5a Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 24 Jan 2020 23:20:51 +0100 Subject: [PATCH 024/162] crypto: chelsio - remove extra allocation for chtls_dev chtls_uld_add allocates room for info->nports net_device structs following the chtls_dev struct, presumably because it was originally intended that the ports array would be stored there. This is suggested by the assignment which was present in initial versions and removed by c4e848586cf1 ("crypto: chelsio - remove redundant assignment to cdev->ports"): cdev->ports = (struct net_device **)(cdev + 1); This assignment was never used, being overwritten by lldi->ports immediately afterwards, and I couldn't find any uses of the memory allocated past the end of the struct. Signed-off-by: Stephen Kitt Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chtls/chtls_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index a038de90b2ea..9f06abd340cc 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -229,8 +229,7 @@ static void *chtls_uld_add(const struct cxgb4_lld_info *info) struct chtls_dev *cdev; int i, j; - cdev = kzalloc(sizeof(*cdev) + info->nports * - (sizeof(struct net_device *)), GFP_KERNEL); + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); if (!cdev) goto out; From fcdba3c33a4dfdc3dc3f2cce46aaf0d6fc1df6c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 5 Feb 2020 15:00:02 +0100 Subject: [PATCH 025/162] hwrng: imx-rngc - improve dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The imx-rngc driver binds to devices that are compatible to "fsl,imx25-rngb". Grepping through the device tree sources suggests this only exists on i.MX25. So restrict dependencies to configs that have this SoC enabled, but allow compile testing. For the latter additional dependencies for clk and readl/writel are necessary. Signed-off-by: Uwe Kleine-König Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 914e293ba62b..ddbfa190caf4 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -244,7 +244,8 @@ config HW_RANDOM_MXC_RNGA config HW_RANDOM_IMX_RNGC tristate "Freescale i.MX RNGC Random Number Generator" - depends on ARCH_MXC + depends on HAS_IOMEM && HAVE_CLK + depends on SOC_IMX25 || COMPILE_TEST default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number From 41ccdbfd5427bbbf3ed58b16750113b38fad1780 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Mon, 10 Feb 2020 13:11:00 -0500 Subject: [PATCH 026/162] padata: fix uninitialized return value in padata_replace() According to Geert's report[0], kernel/padata.c: warning: 'err' may be used uninitialized in this function [-Wuninitialized]: => 539:2 Warning is seen only with older compilers on certain archs. The runtime effect is potentially returning garbage down the stack when padata's cpumasks are modified before any pcrypt requests have run. Simplest fix is to initialize err to the success value. [0] http://lkml.kernel.org/r/20200210135506.11536-1-geert@linux-m68k.org Reported-by: Geert Uytterhoeven Fixes: bbefa1dd6a6d ("crypto: pcrypt - Avoid deadlock by using per-instance padata queues") Signed-off-by: Daniel Jordan Cc: Herbert Xu Cc: Steffen Klassert Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- kernel/padata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/padata.c b/kernel/padata.c index 72777c10bb9c..62082597d4a2 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -512,7 +512,7 @@ static int padata_replace_one(struct padata_shell *ps) static int padata_replace(struct padata_instance *pinst) { struct padata_shell *ps; - int err; + int err = 0; pinst->flags |= PADATA_RESET; From aa017ab97a223d55f4955e2864daf7a05ba1f8a2 Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Tue, 11 Feb 2020 15:54:22 +0800 Subject: [PATCH 027/162] uacce: Add documents for uacce Uacce (Unified/User-space-access-intended Accelerator Framework) is a kernel module targets to provide Shared Virtual Addressing (SVA) between the accelerator and process. This patch add document to explain how it works. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jonathan Cameron Signed-off-by: Kenneth Lee Signed-off-by: Zaibo Xu Signed-off-by: Zhou Wang Signed-off-by: Zhangfei Gao Signed-off-by: Herbert Xu --- Documentation/misc-devices/uacce.rst | 176 +++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 Documentation/misc-devices/uacce.rst diff --git a/Documentation/misc-devices/uacce.rst b/Documentation/misc-devices/uacce.rst new file mode 100644 index 000000000000..1db412e9b1a3 --- /dev/null +++ b/Documentation/misc-devices/uacce.rst @@ -0,0 +1,176 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Introduction of Uacce +--------------------- + +Uacce (Unified/User-space-access-intended Accelerator Framework) targets to +provide Shared Virtual Addressing (SVA) between accelerators and processes. +So accelerator can access any data structure of the main cpu. +This differs from the data sharing between cpu and io device, which share +only data content rather than address. +Because of the unified address, hardware and user space of process can +share the same virtual address in the communication. +Uacce takes the hardware accelerator as a heterogeneous processor, while +IOMMU share the same CPU page tables and as a result the same translation +from va to pa. + +:: + + __________________________ __________________________ + | | | | + | User application (CPU) | | Hardware Accelerator | + |__________________________| |__________________________| + + | | + | va | va + V V + __________ __________ + | | | | + | MMU | | IOMMU | + |__________| |__________| + | | + | | + V pa V pa + _______________________________________ + | | + | Memory | + |_______________________________________| + + + +Architecture +------------ + +Uacce is the kernel module, taking charge of iommu and address sharing. +The user drivers and libraries are called WarpDrive. + +The uacce device, built around the IOMMU SVA API, can access multiple +address spaces, including the one without PASID. + +A virtual concept, queue, is used for the communication. It provides a +FIFO-like interface. And it maintains a unified address space between the +application and all involved hardware. + +:: + + ___________________ ________________ + | | user API | | + | WarpDrive library | ------------> | user driver | + |___________________| |________________| + | | + | | + | queue fd | + | | + | | + v | + ___________________ _________ | + | | | | | mmap memory + | Other framework | | uacce | | r/w interface + | crypto/nic/others | |_________| | + |___________________| | + | | | + | register | register | + | | | + | | | + | _________________ __________ | + | | | | | | + ------------- | Device Driver | | IOMMU | | + |_________________| |__________| | + | | + | V + | ___________________ + | | | + -------------------------- | Device(Hardware) | + |___________________| + + +How does it work +---------------- + +Uacce uses mmap and IOMMU to play the trick. + +Uacce creates a chrdev for every device registered to it. New queue is +created when user application open the chrdev. The file descriptor is used +as the user handle of the queue. +The accelerator device present itself as an Uacce object, which exports as +a chrdev to the user space. The user application communicates with the +hardware by ioctl (as control path) or share memory (as data path). + +The control path to the hardware is via file operation, while data path is +via mmap space of the queue fd. + +The queue file address space: + +:: + + /** + * enum uacce_qfrt: qfrt type + * @UACCE_QFRT_MMIO: device mmio region + * @UACCE_QFRT_DUS: device user share region + */ + enum uacce_qfrt { + UACCE_QFRT_MMIO = 0, + UACCE_QFRT_DUS = 1, + }; + +All regions are optional and differ from device type to type. +Each region can be mmapped only once, otherwise -EEXIST returns. + +The device mmio region is mapped to the hardware mmio space. It is generally +used for doorbell or other notification to the hardware. It is not fast enough +as data channel. + +The device user share region is used for share data buffer between user process +and device. + + +The Uacce register API +---------------------- + +The register API is defined in uacce.h. + +:: + + struct uacce_interface { + char name[UACCE_MAX_NAME_SIZE]; + unsigned int flags; + const struct uacce_ops *ops; + }; + +According to the IOMMU capability, uacce_interface flags can be: + +:: + + /** + * UACCE Device flags: + * UACCE_DEV_SVA: Shared Virtual Addresses + * Support PASID + * Support device page faults (PCI PRI or SMMU Stall) + */ + #define UACCE_DEV_SVA BIT(0) + + struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface); + int uacce_register(struct uacce_device *uacce); + void uacce_remove(struct uacce_device *uacce); + +uacce_register results can be: + +a. If uacce module is not compiled, ERR_PTR(-ENODEV) + +b. Succeed with the desired flags + +c. Succeed with the negotiated flags, for example + + uacce_interface.flags = UACCE_DEV_SVA but uacce->flags = ~UACCE_DEV_SVA + + So user driver need check return value as well as the negotiated uacce->flags. + + +The user driver +--------------- + +The queue file mmap space will need a user driver to wrap the communication +protocol. Uacce provides some attributes in sysfs for the user driver to +match the right accelerator accordingly. +More details in Documentation/ABI/testing/sysfs-driver-uacce. From 015d239ac0142ad0e26567fd890ef8d171f13709 Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Tue, 11 Feb 2020 15:54:23 +0800 Subject: [PATCH 028/162] uacce: add uacce driver Uacce (Unified/User-space-access-intended Accelerator Framework) targets to provide Shared Virtual Addressing (SVA) between accelerators and processes. So accelerator can access any data structure of the main cpu. This differs from the data sharing between cpu and io device, which share only data content rather than address. Since unified address, hardware and user space of process can share the same virtual address in the communication. Uacce create a chrdev for every registration, the queue is allocated to the process when the chrdev is opened. Then the process can access the hardware resource by interact with the queue file. By mmap the queue file space to user space, the process can directly put requests to the hardware without syscall to the kernel space. The IOMMU core only tracks mm<->device bonds at the moment, because it only needs to handle IOTLB invalidation and PASID table entries. However uacce needs a finer granularity since multiple queues from the same device can be bound to an mm. When the mm exits, all bound queues must be stopped so that the IOMMU can safely clear the PASID table entry and reallocate the PASID. An intermediate struct uacce_mm links uacce devices and queues. Note that an mm may be bound to multiple devices but an uacce_mm structure only ever belongs to a single device, because we don't need anything more complex (if multiple devices are bound to one mm, then we'll create one uacce_mm for each bond). uacce_device --+-- uacce_mm --+-- uacce_queue | '-- uacce_queue | '-- uacce_mm --+-- uacce_queue +-- uacce_queue '-- uacce_queue Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jonathan Cameron Signed-off-by: Kenneth Lee Signed-off-by: Zaibo Xu Signed-off-by: Zhou Wang Signed-off-by: Jean-Philippe Brucker Signed-off-by: Zhangfei Gao Signed-off-by: Herbert Xu --- Documentation/ABI/testing/sysfs-driver-uacce | 39 ++ drivers/misc/Kconfig | 1 + drivers/misc/Makefile | 1 + drivers/misc/uacce/Kconfig | 13 + drivers/misc/uacce/Makefile | 2 + drivers/misc/uacce/uacce.c | 617 +++++++++++++++++++ include/linux/uacce.h | 161 +++++ include/uapi/misc/uacce/uacce.h | 38 ++ 8 files changed, 872 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-driver-uacce create mode 100644 drivers/misc/uacce/Kconfig create mode 100644 drivers/misc/uacce/Makefile create mode 100644 drivers/misc/uacce/uacce.c create mode 100644 include/linux/uacce.h create mode 100644 include/uapi/misc/uacce/uacce.h diff --git a/Documentation/ABI/testing/sysfs-driver-uacce b/Documentation/ABI/testing/sysfs-driver-uacce new file mode 100644 index 000000000000..08f2591138af --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-uacce @@ -0,0 +1,39 @@ +What: /sys/class/uacce//api +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Api of the device + Can be any string and up to userspace to parse. + Application use the api to match the correct driver + +What: /sys/class/uacce//flags +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Attributes of the device, see UACCE_DEV_xxx flag defined in uacce.h + +What: /sys/class/uacce//available_instances +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Available instances left of the device + Return -ENODEV if uacce_ops get_available_instances is not provided + +What: /sys/class/uacce//algorithms +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Algorithms supported by this accelerator, separated by new line. + Can be any string and up to userspace to parse. + +What: /sys/class/uacce//region_mmio_size +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Size (bytes) of mmio region queue file + +What: /sys/class/uacce//region_dus_size +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Size (bytes) of dus region queue file diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 7f0d48f406e3..99e151475d8f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -480,4 +480,5 @@ source "drivers/misc/cxl/Kconfig" source "drivers/misc/ocxl/Kconfig" source "drivers/misc/cardreader/Kconfig" source "drivers/misc/habanalabs/Kconfig" +source "drivers/misc/uacce/Kconfig" endmenu diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index c1860d35dc7e..9abf2923d831 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -56,4 +56,5 @@ obj-$(CONFIG_OCXL) += ocxl/ obj-y += cardreader/ obj-$(CONFIG_PVPANIC) += pvpanic.o obj-$(CONFIG_HABANA_AI) += habanalabs/ +obj-$(CONFIG_UACCE) += uacce/ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o diff --git a/drivers/misc/uacce/Kconfig b/drivers/misc/uacce/Kconfig new file mode 100644 index 000000000000..5e39b6083b0f --- /dev/null +++ b/drivers/misc/uacce/Kconfig @@ -0,0 +1,13 @@ +config UACCE + tristate "Accelerator Framework for User Land" + depends on IOMMU_API + help + UACCE provides interface for the user process to access the hardware + without interaction with the kernel space in data path. + + The user-space interface is described in + include/uapi/misc/uacce/uacce.h + + See Documentation/misc-devices/uacce.rst for more details. + + If you don't know what to do here, say N. diff --git a/drivers/misc/uacce/Makefile b/drivers/misc/uacce/Makefile new file mode 100644 index 000000000000..5b4374e8b5f2 --- /dev/null +++ b/drivers/misc/uacce/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +obj-$(CONFIG_UACCE) += uacce.o diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c new file mode 100644 index 000000000000..ffced4da3c2c --- /dev/null +++ b/drivers/misc/uacce/uacce.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include +#include +#include + +static struct class *uacce_class; +static dev_t uacce_devt; +static DEFINE_MUTEX(uacce_mutex); +static DEFINE_XARRAY_ALLOC(uacce_xa); + +static int uacce_start_queue(struct uacce_queue *q) +{ + int ret = 0; + + mutex_lock(&uacce_mutex); + + if (q->state != UACCE_Q_INIT) { + ret = -EINVAL; + goto out_with_lock; + } + + if (q->uacce->ops->start_queue) { + ret = q->uacce->ops->start_queue(q); + if (ret < 0) + goto out_with_lock; + } + + q->state = UACCE_Q_STARTED; + +out_with_lock: + mutex_unlock(&uacce_mutex); + + return ret; +} + +static int uacce_put_queue(struct uacce_queue *q) +{ + struct uacce_device *uacce = q->uacce; + + mutex_lock(&uacce_mutex); + + if (q->state == UACCE_Q_ZOMBIE) + goto out; + + if ((q->state == UACCE_Q_STARTED) && uacce->ops->stop_queue) + uacce->ops->stop_queue(q); + + if ((q->state == UACCE_Q_INIT || q->state == UACCE_Q_STARTED) && + uacce->ops->put_queue) + uacce->ops->put_queue(q); + + q->state = UACCE_Q_ZOMBIE; +out: + mutex_unlock(&uacce_mutex); + + return 0; +} + +static long uacce_fops_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + + switch (cmd) { + case UACCE_CMD_START_Q: + return uacce_start_queue(q); + + case UACCE_CMD_PUT_Q: + return uacce_put_queue(q); + + default: + if (!uacce->ops->ioctl) + return -EINVAL; + + return uacce->ops->ioctl(q, cmd, arg); + } +} + +#ifdef CONFIG_COMPAT +static long uacce_fops_compat_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + arg = (unsigned long)compat_ptr(arg); + + return uacce_fops_unl_ioctl(filep, cmd, arg); +} +#endif + +static int uacce_sva_exit(struct device *dev, struct iommu_sva *handle, + void *data) +{ + struct uacce_mm *uacce_mm = data; + struct uacce_queue *q; + + /* + * No new queue can be added concurrently because no caller can have a + * reference to this mm. But there may be concurrent calls to + * uacce_mm_put(), so we need the lock. + */ + mutex_lock(&uacce_mm->lock); + list_for_each_entry(q, &uacce_mm->queues, list) + uacce_put_queue(q); + uacce_mm->mm = NULL; + mutex_unlock(&uacce_mm->lock); + + return 0; +} + +static struct iommu_sva_ops uacce_sva_ops = { + .mm_exit = uacce_sva_exit, +}; + +static struct uacce_mm *uacce_mm_get(struct uacce_device *uacce, + struct uacce_queue *q, + struct mm_struct *mm) +{ + struct uacce_mm *uacce_mm = NULL; + struct iommu_sva *handle = NULL; + int ret; + + lockdep_assert_held(&uacce->mm_lock); + + list_for_each_entry(uacce_mm, &uacce->mm_list, list) { + if (uacce_mm->mm == mm) { + mutex_lock(&uacce_mm->lock); + list_add(&q->list, &uacce_mm->queues); + mutex_unlock(&uacce_mm->lock); + return uacce_mm; + } + } + + uacce_mm = kzalloc(sizeof(*uacce_mm), GFP_KERNEL); + if (!uacce_mm) + return NULL; + + if (uacce->flags & UACCE_DEV_SVA) { + /* + * Safe to pass an incomplete uacce_mm, since mm_exit cannot + * fire while we hold a reference to the mm. + */ + handle = iommu_sva_bind_device(uacce->parent, mm, uacce_mm); + if (IS_ERR(handle)) + goto err_free; + + ret = iommu_sva_set_ops(handle, &uacce_sva_ops); + if (ret) + goto err_unbind; + + uacce_mm->pasid = iommu_sva_get_pasid(handle); + if (uacce_mm->pasid == IOMMU_PASID_INVALID) + goto err_unbind; + } + + uacce_mm->mm = mm; + uacce_mm->handle = handle; + INIT_LIST_HEAD(&uacce_mm->queues); + mutex_init(&uacce_mm->lock); + list_add(&q->list, &uacce_mm->queues); + list_add(&uacce_mm->list, &uacce->mm_list); + + return uacce_mm; + +err_unbind: + if (handle) + iommu_sva_unbind_device(handle); +err_free: + kfree(uacce_mm); + return NULL; +} + +static void uacce_mm_put(struct uacce_queue *q) +{ + struct uacce_mm *uacce_mm = q->uacce_mm; + + lockdep_assert_held(&q->uacce->mm_lock); + + mutex_lock(&uacce_mm->lock); + list_del(&q->list); + mutex_unlock(&uacce_mm->lock); + + if (list_empty(&uacce_mm->queues)) { + if (uacce_mm->handle) + iommu_sva_unbind_device(uacce_mm->handle); + list_del(&uacce_mm->list); + kfree(uacce_mm); + } +} + +static int uacce_fops_open(struct inode *inode, struct file *filep) +{ + struct uacce_mm *uacce_mm = NULL; + struct uacce_device *uacce; + struct uacce_queue *q; + int ret = 0; + + uacce = xa_load(&uacce_xa, iminor(inode)); + if (!uacce) + return -ENODEV; + + q = kzalloc(sizeof(struct uacce_queue), GFP_KERNEL); + if (!q) + return -ENOMEM; + + mutex_lock(&uacce->mm_lock); + uacce_mm = uacce_mm_get(uacce, q, current->mm); + mutex_unlock(&uacce->mm_lock); + if (!uacce_mm) { + ret = -ENOMEM; + goto out_with_mem; + } + + q->uacce = uacce; + q->uacce_mm = uacce_mm; + + if (uacce->ops->get_queue) { + ret = uacce->ops->get_queue(uacce, uacce_mm->pasid, q); + if (ret < 0) + goto out_with_mm; + } + + init_waitqueue_head(&q->wait); + filep->private_data = q; + q->state = UACCE_Q_INIT; + + return 0; + +out_with_mm: + mutex_lock(&uacce->mm_lock); + uacce_mm_put(q); + mutex_unlock(&uacce->mm_lock); +out_with_mem: + kfree(q); + return ret; +} + +static int uacce_fops_release(struct inode *inode, struct file *filep) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + + uacce_put_queue(q); + + mutex_lock(&uacce->mm_lock); + uacce_mm_put(q); + mutex_unlock(&uacce->mm_lock); + + kfree(q); + + return 0; +} + +static void uacce_vma_close(struct vm_area_struct *vma) +{ + struct uacce_queue *q = vma->vm_private_data; + struct uacce_qfile_region *qfr = NULL; + + if (vma->vm_pgoff < UACCE_MAX_REGION) + qfr = q->qfrs[vma->vm_pgoff]; + + kfree(qfr); +} + +static const struct vm_operations_struct uacce_vm_ops = { + .close = uacce_vma_close, +}; + +static int uacce_fops_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct uacce_queue *q = filep->private_data; + struct uacce_device *uacce = q->uacce; + struct uacce_qfile_region *qfr; + enum uacce_qfrt type = UACCE_MAX_REGION; + int ret = 0; + + if (vma->vm_pgoff < UACCE_MAX_REGION) + type = vma->vm_pgoff; + else + return -EINVAL; + + qfr = kzalloc(sizeof(*qfr), GFP_KERNEL); + if (!qfr) + return -ENOMEM; + + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_WIPEONFORK; + vma->vm_ops = &uacce_vm_ops; + vma->vm_private_data = q; + qfr->type = type; + + mutex_lock(&uacce_mutex); + + if (q->state != UACCE_Q_INIT && q->state != UACCE_Q_STARTED) { + ret = -EINVAL; + goto out_with_lock; + } + + if (q->qfrs[type]) { + ret = -EEXIST; + goto out_with_lock; + } + + switch (type) { + case UACCE_QFRT_MMIO: + if (!uacce->ops->mmap) { + ret = -EINVAL; + goto out_with_lock; + } + + ret = uacce->ops->mmap(q, vma, qfr); + if (ret) + goto out_with_lock; + + break; + + case UACCE_QFRT_DUS: + if (!uacce->ops->mmap) { + ret = -EINVAL; + goto out_with_lock; + } + + ret = uacce->ops->mmap(q, vma, qfr); + if (ret) + goto out_with_lock; + break; + + default: + ret = -EINVAL; + goto out_with_lock; + } + + q->qfrs[type] = qfr; + mutex_unlock(&uacce_mutex); + + return ret; + +out_with_lock: + mutex_unlock(&uacce_mutex); + kfree(qfr); + return ret; +} + +static __poll_t uacce_fops_poll(struct file *file, poll_table *wait) +{ + struct uacce_queue *q = file->private_data; + struct uacce_device *uacce = q->uacce; + + poll_wait(file, &q->wait, wait); + if (uacce->ops->is_q_updated && uacce->ops->is_q_updated(q)) + return EPOLLIN | EPOLLRDNORM; + + return 0; +} + +static const struct file_operations uacce_fops = { + .owner = THIS_MODULE, + .open = uacce_fops_open, + .release = uacce_fops_release, + .unlocked_ioctl = uacce_fops_unl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = uacce_fops_compat_ioctl, +#endif + .mmap = uacce_fops_mmap, + .poll = uacce_fops_poll, +}; + +#define to_uacce_device(dev) container_of(dev, struct uacce_device, dev) + +static ssize_t api_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%s\n", uacce->api_ver); +} + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%u\n", uacce->flags); +} + +static ssize_t available_instances_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + if (!uacce->ops->get_available_instances) + return -ENODEV; + + return sprintf(buf, "%d\n", + uacce->ops->get_available_instances(uacce)); +} + +static ssize_t algorithms_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%s\n", uacce->algs); +} + +static ssize_t region_mmio_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%lu\n", + uacce->qf_pg_num[UACCE_QFRT_MMIO] << PAGE_SHIFT); +} + +static ssize_t region_dus_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + return sprintf(buf, "%lu\n", + uacce->qf_pg_num[UACCE_QFRT_DUS] << PAGE_SHIFT); +} + +static DEVICE_ATTR_RO(api); +static DEVICE_ATTR_RO(flags); +static DEVICE_ATTR_RO(available_instances); +static DEVICE_ATTR_RO(algorithms); +static DEVICE_ATTR_RO(region_mmio_size); +static DEVICE_ATTR_RO(region_dus_size); + +static struct attribute *uacce_dev_attrs[] = { + &dev_attr_api.attr, + &dev_attr_flags.attr, + &dev_attr_available_instances.attr, + &dev_attr_algorithms.attr, + &dev_attr_region_mmio_size.attr, + &dev_attr_region_dus_size.attr, + NULL, +}; + +static umode_t uacce_dev_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct uacce_device *uacce = to_uacce_device(dev); + + if (((attr == &dev_attr_region_mmio_size.attr) && + (!uacce->qf_pg_num[UACCE_QFRT_MMIO])) || + ((attr == &dev_attr_region_dus_size.attr) && + (!uacce->qf_pg_num[UACCE_QFRT_DUS]))) + return 0; + + return attr->mode; +} + +static struct attribute_group uacce_dev_group = { + .is_visible = uacce_dev_is_visible, + .attrs = uacce_dev_attrs, +}; + +__ATTRIBUTE_GROUPS(uacce_dev); + +static void uacce_release(struct device *dev) +{ + struct uacce_device *uacce = to_uacce_device(dev); + + kfree(uacce); +} + +/** + * uacce_alloc() - alloc an accelerator + * @parent: pointer of uacce parent device + * @interface: pointer of uacce_interface for register + * + * Returns uacce pointer if success and ERR_PTR if not + * Need check returned negotiated uacce->flags + */ +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface) +{ + unsigned int flags = interface->flags; + struct uacce_device *uacce; + int ret; + + uacce = kzalloc(sizeof(struct uacce_device), GFP_KERNEL); + if (!uacce) + return ERR_PTR(-ENOMEM); + + if (flags & UACCE_DEV_SVA) { + ret = iommu_dev_enable_feature(parent, IOMMU_DEV_FEAT_SVA); + if (ret) + flags &= ~UACCE_DEV_SVA; + } + + uacce->parent = parent; + uacce->flags = flags; + uacce->ops = interface->ops; + + ret = xa_alloc(&uacce_xa, &uacce->dev_id, uacce, xa_limit_32b, + GFP_KERNEL); + if (ret < 0) + goto err_with_uacce; + + INIT_LIST_HEAD(&uacce->mm_list); + mutex_init(&uacce->mm_lock); + device_initialize(&uacce->dev); + uacce->dev.devt = MKDEV(MAJOR(uacce_devt), uacce->dev_id); + uacce->dev.class = uacce_class; + uacce->dev.groups = uacce_dev_groups; + uacce->dev.parent = uacce->parent; + uacce->dev.release = uacce_release; + dev_set_name(&uacce->dev, "%s-%d", interface->name, uacce->dev_id); + + return uacce; + +err_with_uacce: + if (flags & UACCE_DEV_SVA) + iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA); + kfree(uacce); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(uacce_alloc); + +/** + * uacce_register() - add the accelerator to cdev and export to user space + * @uacce: The initialized uacce device + * + * Return 0 if register succeeded, or an error. + */ +int uacce_register(struct uacce_device *uacce) +{ + if (!uacce) + return -ENODEV; + + uacce->cdev = cdev_alloc(); + if (!uacce->cdev) + return -ENOMEM; + + uacce->cdev->ops = &uacce_fops; + uacce->cdev->owner = THIS_MODULE; + + return cdev_device_add(uacce->cdev, &uacce->dev); +} +EXPORT_SYMBOL_GPL(uacce_register); + +/** + * uacce_remove() - remove the accelerator + * @uacce: the accelerator to remove + */ +void uacce_remove(struct uacce_device *uacce) +{ + struct uacce_mm *uacce_mm; + struct uacce_queue *q; + + if (!uacce) + return; + + /* ensure no open queue remains */ + mutex_lock(&uacce->mm_lock); + list_for_each_entry(uacce_mm, &uacce->mm_list, list) { + /* + * We don't take the uacce_mm->lock here. Since we hold the + * device's mm_lock, no queue can be added to or removed from + * this uacce_mm. We may run concurrently with mm_exit, but + * uacce_put_queue() is serialized and iommu_sva_unbind_device() + * waits for the lock that mm_exit is holding. + */ + list_for_each_entry(q, &uacce_mm->queues, list) + uacce_put_queue(q); + + if (uacce->flags & UACCE_DEV_SVA) { + iommu_sva_unbind_device(uacce_mm->handle); + uacce_mm->handle = NULL; + } + } + mutex_unlock(&uacce->mm_lock); + + /* disable sva now since no opened queues */ + if (uacce->flags & UACCE_DEV_SVA) + iommu_dev_disable_feature(uacce->parent, IOMMU_DEV_FEAT_SVA); + + if (uacce->cdev) + cdev_device_del(uacce->cdev, &uacce->dev); + xa_erase(&uacce_xa, uacce->dev_id); + put_device(&uacce->dev); +} +EXPORT_SYMBOL_GPL(uacce_remove); + +static int __init uacce_init(void) +{ + int ret; + + uacce_class = class_create(THIS_MODULE, UACCE_NAME); + if (IS_ERR(uacce_class)) + return PTR_ERR(uacce_class); + + ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME); + if (ret) + class_destroy(uacce_class); + + return ret; +} + +static __exit void uacce_exit(void) +{ + unregister_chrdev_region(uacce_devt, MINORMASK); + class_destroy(uacce_class); +} + +subsys_initcall(uacce_init); +module_exit(uacce_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hisilicon Tech. Co., Ltd."); +MODULE_DESCRIPTION("Accelerator interface for Userland applications"); diff --git a/include/linux/uacce.h b/include/linux/uacce.h new file mode 100644 index 000000000000..904a461591a3 --- /dev/null +++ b/include/linux/uacce.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_UACCE_H +#define _LINUX_UACCE_H + +#include +#include + +#define UACCE_NAME "uacce" +#define UACCE_MAX_REGION 2 +#define UACCE_MAX_NAME_SIZE 64 + +struct uacce_queue; +struct uacce_device; + +/** + * struct uacce_qfile_region - structure of queue file region + * @type: type of the region + */ +struct uacce_qfile_region { + enum uacce_qfrt type; +}; + +/** + * struct uacce_ops - uacce device operations + * @get_available_instances: get available instances left of the device + * @get_queue: get a queue from the device + * @put_queue: free a queue to the device + * @start_queue: make the queue start work after get_queue + * @stop_queue: make the queue stop work before put_queue + * @is_q_updated: check whether the task is finished + * @mmap: mmap addresses of queue to user space + * @ioctl: ioctl for user space users of the queue + */ +struct uacce_ops { + int (*get_available_instances)(struct uacce_device *uacce); + int (*get_queue)(struct uacce_device *uacce, unsigned long arg, + struct uacce_queue *q); + void (*put_queue)(struct uacce_queue *q); + int (*start_queue)(struct uacce_queue *q); + void (*stop_queue)(struct uacce_queue *q); + int (*is_q_updated)(struct uacce_queue *q); + int (*mmap)(struct uacce_queue *q, struct vm_area_struct *vma, + struct uacce_qfile_region *qfr); + long (*ioctl)(struct uacce_queue *q, unsigned int cmd, + unsigned long arg); +}; + +/** + * struct uacce_interface - interface required for uacce_register() + * @name: the uacce device name. Will show up in sysfs + * @flags: uacce device attributes + * @ops: pointer to the struct uacce_ops + */ +struct uacce_interface { + char name[UACCE_MAX_NAME_SIZE]; + unsigned int flags; + const struct uacce_ops *ops; +}; + +enum uacce_q_state { + UACCE_Q_ZOMBIE = 0, + UACCE_Q_INIT, + UACCE_Q_STARTED, +}; + +/** + * struct uacce_queue + * @uacce: pointer to uacce + * @priv: private pointer + * @wait: wait queue head + * @list: index into uacce_mm + * @uacce_mm: the corresponding mm + * @qfrs: pointer of qfr regions + * @state: queue state machine + */ +struct uacce_queue { + struct uacce_device *uacce; + void *priv; + wait_queue_head_t wait; + struct list_head list; + struct uacce_mm *uacce_mm; + struct uacce_qfile_region *qfrs[UACCE_MAX_REGION]; + enum uacce_q_state state; +}; + +/** + * struct uacce_device + * @algs: supported algorithms + * @api_ver: api version + * @ops: pointer to the struct uacce_ops + * @qf_pg_num: page numbers of the queue file regions + * @parent: pointer to the parent device + * @is_vf: whether virtual function + * @flags: uacce attributes + * @dev_id: id of the uacce device + * @cdev: cdev of the uacce + * @dev: dev of the uacce + * @priv: private pointer of the uacce + * @mm_list: list head of uacce_mm->list + * @mm_lock: lock for mm_list + */ +struct uacce_device { + const char *algs; + const char *api_ver; + const struct uacce_ops *ops; + unsigned long qf_pg_num[UACCE_MAX_REGION]; + struct device *parent; + bool is_vf; + u32 flags; + u32 dev_id; + struct cdev *cdev; + struct device dev; + void *priv; + struct list_head mm_list; + struct mutex mm_lock; +}; + +/** + * struct uacce_mm - keep track of queues bound to a process + * @list: index into uacce_device + * @queues: list of queues + * @mm: the mm struct + * @lock: protects the list of queues + * @pasid: pasid of the uacce_mm + * @handle: iommu_sva handle return from iommu_sva_bind_device + */ +struct uacce_mm { + struct list_head list; + struct list_head queues; + struct mm_struct *mm; + struct mutex lock; + int pasid; + struct iommu_sva *handle; +}; + +#if IS_ENABLED(CONFIG_UACCE) + +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface); +int uacce_register(struct uacce_device *uacce); +void uacce_remove(struct uacce_device *uacce); + +#else /* CONFIG_UACCE */ + +static inline +struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface) +{ + return ERR_PTR(-ENODEV); +} + +static inline int uacce_register(struct uacce_device *uacce) +{ + return -EINVAL; +} + +static inline void uacce_remove(struct uacce_device *uacce) {} + +#endif /* CONFIG_UACCE */ + +#endif /* _LINUX_UACCE_H */ diff --git a/include/uapi/misc/uacce/uacce.h b/include/uapi/misc/uacce/uacce.h new file mode 100644 index 000000000000..cc7185678f47 --- /dev/null +++ b/include/uapi/misc/uacce/uacce.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _UAPIUUACCE_H +#define _UAPIUUACCE_H + +#include +#include + +/* + * UACCE_CMD_START_Q: Start queue + */ +#define UACCE_CMD_START_Q _IO('W', 0) + +/* + * UACCE_CMD_PUT_Q: + * User actively stop queue and free queue resource immediately + * Optimization method since close fd may delay + */ +#define UACCE_CMD_PUT_Q _IO('W', 1) + +/* + * UACCE Device flags: + * UACCE_DEV_SVA: Shared Virtual Addresses + * Support PASID + * Support device page faults (PCI PRI or SMMU Stall) + */ +#define UACCE_DEV_SVA BIT(0) + +/** + * enum uacce_qfrt: queue file region type + * @UACCE_QFRT_MMIO: device mmio region + * @UACCE_QFRT_DUS: device user share region + */ +enum uacce_qfrt { + UACCE_QFRT_MMIO = 0, + UACCE_QFRT_DUS = 1, +}; + +#endif From 18bead70e9919bb2d1826c4070f2982dd63e2fcc Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Tue, 11 Feb 2020 15:54:24 +0800 Subject: [PATCH 029/162] crypto: hisilicon - Remove module_param uacce_mode Remove the module_param uacce_mode, which is not used currently. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jonathan Cameron Signed-off-by: Zhangfei Gao Signed-off-by: Zhou Wang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/zip/zip_main.c | 31 +++++-------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index f4aec18a25b4..868e32c2d4e5 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -301,9 +301,6 @@ static u32 pf_q_num = HZIP_PF_DEF_Q_NUM; module_param_cb(pf_q_num, &pf_q_num_ops, &pf_q_num, 0444); MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)"); -static int uacce_mode; -module_param(uacce_mode, int, 0); - static u32 vfs_num; module_param(vfs_num, uint, 0444); MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63)"); @@ -841,6 +838,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, hisi_zip); qm = &hisi_zip->qm; + qm->use_dma_api = true; qm->pdev = pdev; qm->ver = rev_id; @@ -848,20 +846,6 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) qm->dev_name = hisi_zip_name; qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF : QM_HW_VF; - switch (uacce_mode) { - case 0: - qm->use_dma_api = true; - break; - case 1: - qm->use_dma_api = false; - break; - case 2: - qm->use_dma_api = true; - break; - default: - return -EINVAL; - } - ret = hisi_qm_init(qm); if (ret) { dev_err(&pdev->dev, "Failed to init qm!\n"); @@ -981,12 +965,10 @@ static int __init hisi_zip_init(void) goto err_pci; } - if (uacce_mode == 0 || uacce_mode == 2) { - ret = hisi_zip_register_to_crypto(); - if (ret < 0) { - pr_err("Failed to register driver to crypto.\n"); - goto err_crypto; - } + ret = hisi_zip_register_to_crypto(); + if (ret < 0) { + pr_err("Failed to register driver to crypto.\n"); + goto err_crypto; } return 0; @@ -1001,8 +983,7 @@ err_pci: static void __exit hisi_zip_exit(void) { - if (uacce_mode == 0 || uacce_mode == 2) - hisi_zip_unregister_from_crypto(); + hisi_zip_unregister_from_crypto(); pci_unregister_driver(&hisi_zip_pci_driver); hisi_zip_unregister_debugfs(); } From 9e00df7156e45e42c695ffc596b4bf1328d00516 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Tue, 11 Feb 2020 15:54:25 +0800 Subject: [PATCH 030/162] crypto: hisilicon - register zip engine to uacce Register qm to uacce framework for user crypto driver Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jonathan Cameron Signed-off-by: Zhangfei Gao Signed-off-by: Zhou Wang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 239 +++++++++++++++++++++++- drivers/crypto/hisilicon/qm.h | 11 ++ drivers/crypto/hisilicon/zip/zip_main.c | 18 +- include/uapi/misc/uacce/hisi_qm.h | 23 +++ 4 files changed, 283 insertions(+), 8 deletions(-) create mode 100644 include/uapi/misc/uacce/hisi_qm.h diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 79f84dc63a43..ad7146a0c577 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -9,6 +9,9 @@ #include #include #include +#include +#include +#include #include "qm.h" /* eq/aeq irq enable */ @@ -466,9 +469,14 @@ static void qm_cq_head_update(struct hisi_qp *qp) static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm) { - struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head; + if (qp->event_cb) { + qp->event_cb(qp); + return; + } if (qp->req_cb) { + struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head; + while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) { dma_rmb(); qp->req_cb(qp, qp->sqe + qm->sqe_size * @@ -1273,7 +1281,7 @@ static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid) * @qp: The qp we want to start to run. * @arg: Accelerator specific argument. * - * After this function, qp can receive request from user. Return qp_id if + * After this function, qp can receive request from user. Return 0 if * successful, Return -EBUSY if failed. */ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) @@ -1318,7 +1326,7 @@ int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg) dev_dbg(dev, "queue %d started\n", qp_id); - return qp_id; + return 0; } EXPORT_SYMBOL_GPL(hisi_qm_start_qp); @@ -1399,6 +1407,214 @@ static void hisi_qm_cache_wb(struct hisi_qm *qm) } } +static void qm_qp_event_notifier(struct hisi_qp *qp) +{ + wake_up_interruptible(&qp->uacce_q->wait); +} + +static int hisi_qm_get_available_instances(struct uacce_device *uacce) +{ + int i, ret; + struct hisi_qm *qm = uacce->priv; + + read_lock(&qm->qps_lock); + for (i = 0, ret = 0; i < qm->qp_num; i++) + if (!qm->qp_array[i]) + ret++; + read_unlock(&qm->qps_lock); + + return ret; +} + +static int hisi_qm_uacce_get_queue(struct uacce_device *uacce, + unsigned long arg, + struct uacce_queue *q) +{ + struct hisi_qm *qm = uacce->priv; + struct hisi_qp *qp; + u8 alg_type = 0; + + qp = hisi_qm_create_qp(qm, alg_type); + if (IS_ERR(qp)) + return PTR_ERR(qp); + + q->priv = qp; + q->uacce = uacce; + qp->uacce_q = q; + qp->event_cb = qm_qp_event_notifier; + qp->pasid = arg; + + return 0; +} + +static void hisi_qm_uacce_put_queue(struct uacce_queue *q) +{ + struct hisi_qp *qp = q->priv; + + hisi_qm_cache_wb(qp->qm); + hisi_qm_release_qp(qp); +} + +/* map sq/cq/doorbell to user space */ +static int hisi_qm_uacce_mmap(struct uacce_queue *q, + struct vm_area_struct *vma, + struct uacce_qfile_region *qfr) +{ + struct hisi_qp *qp = q->priv; + struct hisi_qm *qm = qp->qm; + size_t sz = vma->vm_end - vma->vm_start; + struct pci_dev *pdev = qm->pdev; + struct device *dev = &pdev->dev; + unsigned long vm_pgoff; + int ret; + + switch (qfr->type) { + case UACCE_QFRT_MMIO: + if (qm->ver == QM_HW_V2) { + if (sz > PAGE_SIZE * (QM_DOORBELL_PAGE_NR + + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE)) + return -EINVAL; + } else { + if (sz > PAGE_SIZE * QM_DOORBELL_PAGE_NR) + return -EINVAL; + } + + vma->vm_flags |= VM_IO; + + return remap_pfn_range(vma, vma->vm_start, + qm->phys_base >> PAGE_SHIFT, + sz, pgprot_noncached(vma->vm_page_prot)); + case UACCE_QFRT_DUS: + if (sz != qp->qdma.size) + return -EINVAL; + + /* + * dma_mmap_coherent() requires vm_pgoff as 0 + * restore vm_pfoff to initial value for mmap() + */ + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = 0; + ret = dma_mmap_coherent(dev, vma, qp->qdma.va, + qp->qdma.dma, sz); + vma->vm_pgoff = vm_pgoff; + return ret; + + default: + return -EINVAL; + } +} + +static int hisi_qm_uacce_start_queue(struct uacce_queue *q) +{ + struct hisi_qp *qp = q->priv; + + return hisi_qm_start_qp(qp, qp->pasid); +} + +static void hisi_qm_uacce_stop_queue(struct uacce_queue *q) +{ + hisi_qm_stop_qp(q->priv); +} + +static int qm_set_sqctype(struct uacce_queue *q, u16 type) +{ + struct hisi_qm *qm = q->uacce->priv; + struct hisi_qp *qp = q->priv; + + write_lock(&qm->qps_lock); + qp->alg_type = type; + write_unlock(&qm->qps_lock); + + return 0; +} + +static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd, + unsigned long arg) +{ + struct hisi_qp *qp = q->priv; + struct hisi_qp_ctx qp_ctx; + + if (cmd == UACCE_CMD_QM_SET_QP_CTX) { + if (copy_from_user(&qp_ctx, (void __user *)arg, + sizeof(struct hisi_qp_ctx))) + return -EFAULT; + + if (qp_ctx.qc_type != 0 && qp_ctx.qc_type != 1) + return -EINVAL; + + qm_set_sqctype(q, qp_ctx.qc_type); + qp_ctx.id = qp->qp_id; + + if (copy_to_user((void __user *)arg, &qp_ctx, + sizeof(struct hisi_qp_ctx))) + return -EFAULT; + } else { + return -EINVAL; + } + + return 0; +} + +static const struct uacce_ops uacce_qm_ops = { + .get_available_instances = hisi_qm_get_available_instances, + .get_queue = hisi_qm_uacce_get_queue, + .put_queue = hisi_qm_uacce_put_queue, + .start_queue = hisi_qm_uacce_start_queue, + .stop_queue = hisi_qm_uacce_stop_queue, + .mmap = hisi_qm_uacce_mmap, + .ioctl = hisi_qm_uacce_ioctl, +}; + +static int qm_alloc_uacce(struct hisi_qm *qm) +{ + struct pci_dev *pdev = qm->pdev; + struct uacce_device *uacce; + unsigned long mmio_page_nr; + unsigned long dus_page_nr; + struct uacce_interface interface = { + .flags = UACCE_DEV_SVA, + .ops = &uacce_qm_ops, + }; + + strncpy(interface.name, pdev->driver->name, sizeof(interface.name)); + + uacce = uacce_alloc(&pdev->dev, &interface); + if (IS_ERR(uacce)) + return PTR_ERR(uacce); + + if (uacce->flags & UACCE_DEV_SVA) { + qm->use_sva = true; + } else { + /* only consider sva case */ + uacce_remove(uacce); + qm->uacce = NULL; + return -EINVAL; + } + + uacce->is_vf = pdev->is_virtfn; + uacce->priv = qm; + uacce->algs = qm->algs; + + if (qm->ver == QM_HW_V1) { + mmio_page_nr = QM_DOORBELL_PAGE_NR; + uacce->api_ver = HISI_QM_API_VER_BASE; + } else { + mmio_page_nr = QM_DOORBELL_PAGE_NR + + QM_DOORBELL_SQ_CQ_BASE_V2 / PAGE_SIZE; + uacce->api_ver = HISI_QM_API_VER2_BASE; + } + + dus_page_nr = (PAGE_SIZE - 1 + qm->sqe_size * QM_Q_DEPTH + + sizeof(struct qm_cqe) * QM_Q_DEPTH) >> PAGE_SHIFT; + + uacce->qf_pg_num[UACCE_QFRT_MMIO] = mmio_page_nr; + uacce->qf_pg_num[UACCE_QFRT_DUS] = dus_page_nr; + + qm->uacce = uacce; + + return 0; +} + /** * hisi_qm_get_free_qp_num() - Get free number of qp in qm. * @qm: The qm which want to get free qp. @@ -1441,10 +1657,14 @@ int hisi_qm_init(struct hisi_qm *qm) return -EINVAL; } + ret = qm_alloc_uacce(qm); + if (ret < 0) + dev_warn(&pdev->dev, "fail to alloc uacce (%d)\n", ret); + ret = pci_enable_device_mem(pdev); if (ret < 0) { dev_err(&pdev->dev, "Failed to enable device mem!\n"); - return ret; + goto err_remove_uacce; } ret = pci_request_mem_regions(pdev, qm->dev_name); @@ -1453,8 +1673,9 @@ int hisi_qm_init(struct hisi_qm *qm) goto err_disable_pcidev; } - qm->io_base = ioremap(pci_resource_start(pdev, PCI_BAR_2), - pci_resource_len(qm->pdev, PCI_BAR_2)); + qm->phys_base = pci_resource_start(pdev, PCI_BAR_2); + qm->phys_size = pci_resource_len(qm->pdev, PCI_BAR_2); + qm->io_base = ioremap(qm->phys_base, qm->phys_size); if (!qm->io_base) { ret = -EIO; goto err_release_mem_regions; @@ -1497,6 +1718,9 @@ err_release_mem_regions: pci_release_mem_regions(pdev); err_disable_pcidev: pci_disable_device(pdev); +err_remove_uacce: + uacce_remove(qm->uacce); + qm->uacce = NULL; return ret; } @@ -1513,6 +1737,9 @@ void hisi_qm_uninit(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; struct device *dev = &pdev->dev; + uacce_remove(qm->uacce); + qm->uacce = NULL; + if (qm->use_dma_api && qm->qdma.va) { hisi_qm_cache_wb(qm); dma_free_coherent(dev, qm->qdma.size, diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index cae26eabd849..1a4f208a9448 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -77,6 +77,9 @@ #define HISI_ACC_SGL_SGE_NR_MAX 255 +/* page number for queue file region */ +#define QM_DOORBELL_PAGE_NR 1 + enum qp_state { QP_STOP, }; @@ -180,7 +183,12 @@ struct hisi_qm { u32 error_mask; u32 msi_mask; + const char *algs; bool use_dma_api; + bool use_sva; + resource_size_t phys_base; + resource_size_t phys_size; + struct uacce_device *uacce; }; struct hisi_qp_status { @@ -210,10 +218,13 @@ struct hisi_qp { struct hisi_qp_ops *hw_ops; void *qp_ctx; void (*req_cb)(struct hisi_qp *qp, void *data); + void (*event_cb)(struct hisi_qp *qp); struct work_struct work; struct workqueue_struct *wq; struct hisi_qm *qm; + u16 pasid; + struct uacce_queue *uacce_q; }; int hisi_qm_init(struct hisi_qm *qm); diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 868e32c2d4e5..25a311248fb8 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "zip.h" #define PCI_DEVICE_ID_ZIP_PF 0xa250 @@ -354,8 +355,14 @@ static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) writel(AXUSER_BASE, base + HZIP_BD_RUSER_32_63); writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63); writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63); - writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63); - writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63); + + if (hisi_zip->qm.use_sva) { + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_RUSER_32_63); + writel(AXUSER_BASE | AXUSER_SSV, base + HZIP_DATA_WUSER_32_63); + } else { + writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63); + writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63); + } /* let's open all compression/decompression cores */ writel(DECOMP_CHECK_ENABLE | ALL_COMP_DECOMP_EN, @@ -842,6 +849,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) qm->pdev = pdev; qm->ver = rev_id; + qm->algs = "zlib\ngzip"; qm->sqe_size = HZIP_SQE_SIZE; qm->dev_name = hisi_zip_name; qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF : @@ -885,6 +893,12 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) hisi_zip_add_to_list(hisi_zip); + if (qm->uacce) { + ret = uacce_register(qm->uacce); + if (ret) + goto err_qm_uninit; + } + if (qm->fun_type == QM_HW_PF && vfs_num > 0) { ret = hisi_zip_sriov_enable(pdev, vfs_num); if (ret < 0) diff --git a/include/uapi/misc/uacce/hisi_qm.h b/include/uapi/misc/uacce/hisi_qm.h new file mode 100644 index 000000000000..6435f0bcb556 --- /dev/null +++ b/include/uapi/misc/uacce/hisi_qm.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +#ifndef _UAPI_HISI_QM_H +#define _UAPI_HISI_QM_H + +#include + +/** + * struct hisi_qp_ctx - User data for hisi qp. + * @id: qp_index return to user space + * @qc_type: Accelerator algorithm type + */ +struct hisi_qp_ctx { + __u16 id; + __u16 qc_type; +}; + +#define HISI_QM_API_VER_BASE "hisi_qm_v1" +#define HISI_QM_API_VER2_BASE "hisi_qm_v2" + +/* UACCE_CMD_QM_SET_QP_CTX: Set qp algorithm type */ +#define UACCE_CMD_QM_SET_QP_CTX _IOWR('H', 10, struct hisi_qp_ctx) + +#endif From 30332eeefec8f83afcea00c360f99ef64b87f220 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:18:55 +0100 Subject: [PATCH 031/162] debugfs: regset32: Add Runtime PM support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hardware registers of devices under control of power management cannot be accessed at all times. If such a device is suspended, register accesses may lead to undefined behavior, like reading bogus values, or causing exceptions or system lock-ups. Extend struct debugfs_regset32 with an optional field to let device drivers specify the device the registers in the set belong to. This allows debugfs_show_regset32() to make sure the device is resumed while its registers are being read. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Reviewed-by: Greg Kroah-Hartman Acked-by: Rafael J. Wysocki Signed-off-by: Herbert Xu --- fs/debugfs/file.c | 8 ++++++++ include/linux/debugfs.h | 1 + 2 files changed, 9 insertions(+) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 634b09d18b77..204734f8d1c6 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -1060,7 +1061,14 @@ static int debugfs_show_regset32(struct seq_file *s, void *data) { struct debugfs_regset32 *regset = s->private; + if (regset->dev) + pm_runtime_get_sync(regset->dev); + debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, ""); + + if (regset->dev) + pm_runtime_put(regset->dev); + return 0; } diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 3d013de64f70..ad416853e722 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -35,6 +35,7 @@ struct debugfs_regset32 { const struct debugfs_reg32 *regs; int nregs; void __iomem *base; + struct device *dev; /* Optional device for Runtime PM */ }; extern struct dentry *arch_debugfs_dir; From f5f7e1a049e685045a0a6cfd508aab79621446b8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:18:56 +0100 Subject: [PATCH 032/162] crypto: ccree - fix debugfs register access while suspended MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reading the debugfs files under /sys/kernel/debug/ccree/ can be done by the user at any time. On R-Car SoCs, the CCREE device is power-managed using a moduile clock, and if this clock is not running, bogus register values may be read. Fix this by filling in the debugfs_regset32.dev field, so debugfs will make sure the device is resumed while its registers are being read. This fixes the bogus values (0x00000260) in the register dumps on R-Car H3 ES1.0: -e6601000.crypto/regs:HOST_IRR = 0x00000260 -e6601000.crypto/regs:HOST_POWER_DOWN_EN = 0x00000260 +e6601000.crypto/regs:HOST_IRR = 0x00000038 +e6601000.crypto/regs:HOST_POWER_DOWN_EN = 0x00000038 e6601000.crypto/regs:AXIM_MON_ERR = 0x00000000 e6601000.crypto/regs:DSCRPTR_QUEUE_CONTENT = 0x000002aa -e6601000.crypto/regs:HOST_IMR = 0x00000260 +e6601000.crypto/regs:HOST_IMR = 0x017ffeff e6601000.crypto/regs:AXIM_CFG = 0x001f0007 e6601000.crypto/regs:AXIM_CACHE_PARAMS = 0x00000000 -e6601000.crypto/regs:GPR_HOST = 0x00000260 +e6601000.crypto/regs:GPR_HOST = 0x017ffeff e6601000.crypto/regs:AXIM_MON_COMP = 0x00000000 -e6601000.crypto/version:SIGNATURE = 0x00000260 -e6601000.crypto/version:VERSION = 0x00000260 +e6601000.crypto/version:SIGNATURE = 0xdcc63000 +e6601000.crypto/version:VERSION = 0xaf400001 Note that this behavior is system-dependent, and the issue does not show up on all R-Car Gen3 SoCs and boards. Even when the device is suspended, the module clock may be left enabled, if configured by the firmware for Secure Mode, or when controlled by the Real-Time Core. Signed-off-by: Geert Uytterhoeven Acked-by: Gilad Ben-Yossef Acked-by: Rafael J. Wysocki Reviewed-by: Niklas Söderlund Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c index 566999738698..35f3a2137502 100644 --- a/drivers/crypto/ccree/cc_debugfs.c +++ b/drivers/crypto/ccree/cc_debugfs.c @@ -81,6 +81,7 @@ int cc_debugfs_init(struct cc_drvdata *drvdata) regset->regs = debug_regs; regset->nregs = ARRAY_SIZE(debug_regs); regset->base = drvdata->cc_base; + regset->dev = dev; ctx->dir = debugfs_create_dir(drvdata->plat_dev->name, cc_debugfs_dir); @@ -102,6 +103,7 @@ int cc_debugfs_init(struct cc_drvdata *drvdata) verset->nregs = ARRAY_SIZE(pid_cid_regs); } verset->base = drvdata->cc_base; + verset->dev = dev; debugfs_create_regset32("version", 0400, ctx->dir, verset); From b83fd3e5ec284453561784491d877ceff950d1c6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:18:57 +0100 Subject: [PATCH 033/162] crypto: ccree - fix retry handling in cc_send_sync_request() If cc_queues_status() indicates that the queue is full, cc_send_sync_request() should loop and retry. However, cc_queues_status() returns either 0 (for success), or -ENOSPC (for queue full), while cc_send_sync_request() checks for real errors by comparing with -EAGAIN. Hence -ENOSPC is always considered a real error, and the code never retries the operation. Fix this by just removing the check, as cc_queues_status() never returns any other error value than -ENOSPC. Signed-off-by: Geert Uytterhoeven Acked-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_request_mgr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c index 9d61e6f12478..b2a18122f320 100644 --- a/drivers/crypto/ccree/cc_request_mgr.c +++ b/drivers/crypto/ccree/cc_request_mgr.c @@ -476,10 +476,6 @@ int cc_send_sync_request(struct cc_drvdata *drvdata, break; spin_unlock_bh(&mgr->hw_lock); - if (rc != -EAGAIN) { - cc_pm_put_suspend(dev); - return rc; - } wait_for_completion_interruptible(&drvdata->hw_queue_avail); reinit_completion(&drvdata->hw_queue_avail); } From f4274eeca476305a78f140e2e74bd07bd379a5da Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:18:58 +0100 Subject: [PATCH 034/162] crypto: ccree - remove unneeded casts Unneeded casts prevent the compiler from performing valuable checks. This is especially true for function pointers. Remove these casts, to prevent silently introducing bugs when a variable's type might be changed in the future. No change in generated code. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 7 +++---- drivers/crypto/ccree/cc_buffer_mgr.c | 7 +++---- drivers/crypto/ccree/cc_cipher.c | 10 +++++----- drivers/crypto/ccree/cc_hash.c | 24 ++++++++++++------------ 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 0d95bda4de70..7a62fb023d8d 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -448,8 +448,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, if (!key) return -ENOMEM; - key_dma_addr = dma_map_single(dev, (void *)key, keylen, - DMA_TO_DEVICE); + key_dma_addr = dma_map_single(dev, key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", key, keylen); @@ -1921,8 +1920,8 @@ static int cc_proc_aead(struct aead_request *req, } /* Setup request structure */ - cc_req.user_cb = (void *)cc_aead_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_aead_complete; + cc_req.user_arg = req; /* Setup request context */ areq_ctx->gen_ctx.op_type = direct; diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 954f14bddf1d..1e335abd7440 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -118,7 +118,7 @@ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg, u32 nents; nents = sg_nents_for_len(sg, end); - sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip, + sg_copy_buffer(sg, nents, dest, (end - to_skip + 1), to_skip, (direct == CC_SG_TO_BUF)); } @@ -415,10 +415,9 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, /* Map IV buffer */ if (ivsize) { - dump_byte_array("iv", (u8 *)info, ivsize); + dump_byte_array("iv", info, ivsize); req_ctx->gen_ctx.iv_dma_addr = - dma_map_single(dev, (void *)info, - ivsize, DMA_BIDIRECTIONAL); + dma_map_single(dev, info, ivsize, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, req_ctx->gen_ctx.iv_dma_addr)) { dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n", ivsize, info); diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 2679eb4a2418..3f93422c390c 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -184,7 +184,7 @@ static int cc_cipher_init(struct crypto_tfm *tfm) ctx_p->user.key); /* Map key buffer */ - ctx_p->user.key_dma_addr = dma_map_single(dev, (void *)ctx_p->user.key, + ctx_p->user.key_dma_addr = dma_map_single(dev, ctx_p->user.key, max_key_buf_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) { @@ -284,7 +284,7 @@ static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key, dev_dbg(dev, "Setting HW key in context @%p for %s. keylen=%u\n", ctx_p, crypto_tfm_alg_name(tfm), keylen); - dump_byte_array("key", (u8 *)key, keylen); + dump_byte_array("key", key, keylen); /* STAT_PHASE_0: Init and sanity checks */ @@ -387,7 +387,7 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key, dev_dbg(dev, "Setting key in context @%p for %s. keylen=%u\n", ctx_p, crypto_tfm_alg_name(tfm), keylen); - dump_byte_array("key", (u8 *)key, keylen); + dump_byte_array("key", key, keylen); /* STAT_PHASE_0: Init and sanity checks */ @@ -885,8 +885,8 @@ static int cc_cipher_process(struct skcipher_request *req, } /* Setup request structure */ - cc_req.user_cb = (void *)cc_cipher_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_cipher_complete; + cc_req.user_arg = req; /* Setup CPP operation details */ if (ctx_p->key_type == CC_POLICY_PROTECTED_KEY) { diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 912e5ce5079d..36ce015716c3 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -752,7 +752,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, return -ENOMEM; ctx->key_params.key_dma_addr = - dma_map_single(dev, (void *)ctx->key_params.key, keylen, + dma_map_single(dev, ctx->key_params.key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) { dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n", @@ -1067,8 +1067,8 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx) ctx->key_params.keylen = 0; ctx->digest_buff_dma_addr = - dma_map_single(dev, (void *)ctx->digest_buff, - sizeof(ctx->digest_buff), DMA_BIDIRECTIONAL); + dma_map_single(dev, ctx->digest_buff, sizeof(ctx->digest_buff), + DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, ctx->digest_buff_dma_addr)) { dev_err(dev, "Mapping digest len %zu B at va=%pK for DMA failed\n", sizeof(ctx->digest_buff), ctx->digest_buff); @@ -1079,7 +1079,7 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx) &ctx->digest_buff_dma_addr); ctx->opad_tmp_keys_dma_addr = - dma_map_single(dev, (void *)ctx->opad_tmp_keys_buff, + dma_map_single(dev, ctx->opad_tmp_keys_buff, sizeof(ctx->opad_tmp_keys_buff), DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, ctx->opad_tmp_keys_dma_addr)) { @@ -1196,8 +1196,8 @@ static int cc_mac_update(struct ahash_request *req) idx++; /* Setup request structure */ - cc_req.user_cb = (void *)cc_update_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_update_complete; + cc_req.user_arg = req; rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base); if (rc != -EINPROGRESS && rc != -EBUSY) { @@ -1254,8 +1254,8 @@ static int cc_mac_final(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_hash_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_hash_complete; + cc_req.user_arg = req; if (state->xcbc_count && rem_cnt == 0) { /* Load key for ECB decryption */ @@ -1369,8 +1369,8 @@ static int cc_mac_finup(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_hash_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_hash_complete; + cc_req.user_arg = req; if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) { key_len = CC_AES_128_BIT_KEY_SIZE; @@ -1448,8 +1448,8 @@ static int cc_mac_digest(struct ahash_request *req) } /* Setup request structure */ - cc_req.user_cb = (void *)cc_digest_complete; - cc_req.user_arg = (void *)req; + cc_req.user_cb = cc_digest_complete; + cc_req.user_arg = req; if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) { key_len = CC_AES_128_BIT_KEY_SIZE; From f08b58501c74d6ec0828b55a0d4e0b2e840c2b9e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:18:59 +0100 Subject: [PATCH 035/162] crypto: ccree - swap SHA384 and SHA512 larval hashes at build time Due to the way the hardware works, every double word in the SHA384 and SHA512 larval hashes must be swapped. Currently this is done at run time, during driver initialization. However, this swapping can easily be done at build time. Treating each double word as two words has the benefit of changing the larval hashes' types from u64[] to u32[], like for all other hashes, and allows dropping the casts and size doublings when calling cc_set_sram_desc(). Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 1 - drivers/crypto/ccree/cc_hash.c | 49 +++++++++++--------------------- drivers/crypto/ccree/cc_hash.h | 2 -- 3 files changed, 17 insertions(+), 35 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 532bc95a8373..fc34d152f420 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -653,7 +653,6 @@ static struct platform_driver ccree_driver = { static int __init ccree_init(void) { - cc_hash_global_init(); cc_debugfs_global_init(); return platform_driver_register(&ccree_driver); diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 36ce015716c3..c3146f550268 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -39,12 +39,19 @@ static const u32 cc_sha256_init[] = { SHA256_H3, SHA256_H2, SHA256_H1, SHA256_H0 }; static const u32 cc_digest_len_sha512_init[] = { 0x00000080, 0x00000000, 0x00000000, 0x00000000 }; -static u64 cc_sha384_init[] = { - SHA384_H7, SHA384_H6, SHA384_H5, SHA384_H4, - SHA384_H3, SHA384_H2, SHA384_H1, SHA384_H0 }; -static u64 cc_sha512_init[] = { - SHA512_H7, SHA512_H6, SHA512_H5, SHA512_H4, - SHA512_H3, SHA512_H2, SHA512_H1, SHA512_H0 }; + +/* + * Due to the way the HW works, every double word in the SHA384 and SHA512 + * larval hashes must be stored in hi/lo order + */ +#define hilo(x) upper_32_bits(x), lower_32_bits(x) +static const u32 cc_sha384_init[] = { + hilo(SHA384_H7), hilo(SHA384_H6), hilo(SHA384_H5), hilo(SHA384_H4), + hilo(SHA384_H3), hilo(SHA384_H2), hilo(SHA384_H1), hilo(SHA384_H0) }; +static const u32 cc_sha512_init[] = { + hilo(SHA512_H7), hilo(SHA512_H6), hilo(SHA512_H5), hilo(SHA512_H4), + hilo(SHA512_H3), hilo(SHA512_H2), hilo(SHA512_H1), hilo(SHA512_H0) }; + static const u32 cc_sm3_init[] = { SM3_IVH, SM3_IVG, SM3_IVF, SM3_IVE, SM3_IVD, SM3_IVC, SM3_IVB, SM3_IVA }; @@ -1942,8 +1949,8 @@ int cc_init_hash_sram(struct cc_drvdata *drvdata) } if (large_sha_supported) { - cc_set_sram_desc((u32 *)cc_sha384_init, sram_buff_ofs, - (ARRAY_SIZE(cc_sha384_init) * 2), larval_seq, + cc_set_sram_desc(cc_sha384_init, sram_buff_ofs, + ARRAY_SIZE(cc_sha384_init), larval_seq, &larval_seq_len); rc = send_request_init(drvdata, larval_seq, larval_seq_len); if (rc) @@ -1951,8 +1958,8 @@ int cc_init_hash_sram(struct cc_drvdata *drvdata) sram_buff_ofs += sizeof(cc_sha384_init); larval_seq_len = 0; - cc_set_sram_desc((u32 *)cc_sha512_init, sram_buff_ofs, - (ARRAY_SIZE(cc_sha512_init) * 2), larval_seq, + cc_set_sram_desc(cc_sha512_init, sram_buff_ofs, + ARRAY_SIZE(cc_sha512_init), larval_seq, &larval_seq_len); rc = send_request_init(drvdata, larval_seq, larval_seq_len); if (rc) @@ -1963,28 +1970,6 @@ init_digest_const_err: return rc; } -static void __init cc_swap_dwords(u32 *buf, unsigned long size) -{ - int i; - u32 tmp; - - for (i = 0; i < size; i += 2) { - tmp = buf[i]; - buf[i] = buf[i + 1]; - buf[i + 1] = tmp; - } -} - -/* - * Due to the way the HW works we need to swap every - * double word in the SHA384 and SHA512 larval hashes - */ -void __init cc_hash_global_init(void) -{ - cc_swap_dwords((u32 *)&cc_sha384_init, (ARRAY_SIZE(cc_sha384_init) * 2)); - cc_swap_dwords((u32 *)&cc_sha512_init, (ARRAY_SIZE(cc_sha512_init) * 2)); -} - int cc_hash_alloc(struct cc_drvdata *drvdata) { struct cc_hash_handle *hash_handle; diff --git a/drivers/crypto/ccree/cc_hash.h b/drivers/crypto/ccree/cc_hash.h index 0d6dc61484d7..3dbd0abefea0 100644 --- a/drivers/crypto/ccree/cc_hash.h +++ b/drivers/crypto/ccree/cc_hash.h @@ -104,6 +104,4 @@ cc_digest_len_addr(void *drvdata, u32 mode); */ cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode); -void cc_hash_global_init(void); - #endif /*__CC_HASH_H__*/ From 08e8cb119f5ab434c398d865b16f9d7bdb76b0b0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:00 +0100 Subject: [PATCH 036/162] crypto: ccree - drop duplicated error message on SRAM exhaustion When no SRAM can be allocated, cc_sram_alloc() already prints an error message. Hence there is no need to duplicate this in all callers. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 1 - drivers/crypto/ccree/cc_driver.c | 1 - drivers/crypto/ccree/cc_hash.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 7a62fb023d8d..db89144ce6c5 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -2682,7 +2682,6 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) MAX_HMAC_DIGEST_SIZE); if (aead_handle->sram_workspace_addr == NULL_SRAM_ADDR) { - dev_err(dev, "SRAM pool exhausted\n"); rc = -ENOMEM; goto fail1; } diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index fc34d152f420..599936a0b0a2 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -477,7 +477,6 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata->mlli_sram_addr = cc_sram_alloc(new_drvdata, MAX_MLLI_BUFF_SIZE); if (new_drvdata->mlli_sram_addr == NULL_SRAM_ADDR) { - dev_err(dev, "Failed to alloc MLLI Sram buffer\n"); rc = -ENOMEM; goto post_sram_mgr_err; } diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index c3146f550268..defeb35a16a6 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -2001,7 +2001,6 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) sram_buff = cc_sram_alloc(drvdata, sram_size_to_alloc); if (sram_buff == NULL_SRAM_ADDR) { - dev_err(dev, "SRAM pool exhausted\n"); rc = -ENOMEM; goto fail; } From fc3b8c11aab728a536b6bb89d6760df9c8694d23 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:01 +0100 Subject: [PATCH 037/162] crypto: ccree - remove empty cc_sram_mgr_fini() cc_sram_mgr_fini() doesn't do anything, so it can just be removed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 7 ++----- drivers/crypto/ccree/cc_sram_mgr.c | 10 ---------- drivers/crypto/ccree/cc_sram_mgr.h | 7 ------- 3 files changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 599936a0b0a2..f8aba89e4873 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -478,13 +478,13 @@ static int init_cc_resources(struct platform_device *plat_dev) cc_sram_alloc(new_drvdata, MAX_MLLI_BUFF_SIZE); if (new_drvdata->mlli_sram_addr == NULL_SRAM_ADDR) { rc = -ENOMEM; - goto post_sram_mgr_err; + goto post_fips_init_err; } rc = cc_req_mgr_init(new_drvdata); if (rc) { dev_err(dev, "cc_req_mgr_init failed\n"); - goto post_sram_mgr_err; + goto post_fips_init_err; } rc = cc_buffer_mgr_init(new_drvdata); @@ -538,8 +538,6 @@ post_buf_mgr_err: cc_buffer_mgr_fini(new_drvdata); post_req_mgr_err: cc_req_mgr_fini(new_drvdata); -post_sram_mgr_err: - cc_sram_mgr_fini(new_drvdata); post_fips_init_err: cc_fips_fini(new_drvdata); post_debugfs_err: @@ -568,7 +566,6 @@ static void cleanup_cc_resources(struct platform_device *plat_dev) cc_pm_fini(drvdata); cc_buffer_mgr_fini(drvdata); cc_req_mgr_fini(drvdata); - cc_sram_mgr_fini(drvdata); cc_fips_fini(drvdata); cc_debugfs_fini(drvdata); fini_cc_regs(drvdata); diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c index 62c885e6e791..a3c13b37adce 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.c +++ b/drivers/crypto/ccree/cc_sram_mgr.c @@ -12,16 +12,6 @@ struct cc_sram_ctx { cc_sram_addr_t sram_free_offset; }; -/** - * cc_sram_mgr_fini() - Cleanup SRAM pool. - * - * @drvdata: Associated device driver context - */ -void cc_sram_mgr_fini(struct cc_drvdata *drvdata) -{ - /* Nothing needed */ -} - /** * cc_sram_mgr_init() - Initializes SRAM pool. * The pool starts right at the beginning of SRAM. diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h index 1d14de9ee8c3..971029b0e2a1 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.h +++ b/drivers/crypto/ccree/cc_sram_mgr.h @@ -29,13 +29,6 @@ typedef u64 cc_sram_addr_t; */ int cc_sram_mgr_init(struct cc_drvdata *drvdata); -/*! - * Uninits SRAM pool. - * - * \param drvdata - */ -void cc_sram_mgr_fini(struct cc_drvdata *drvdata); - /*! * Allocated buffer from SRAM pool. * Note: Caller is responsible to free the LAST allocated buffer. From 2f272ef37c29425a6526fec976cf200dda950ee1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:02 +0100 Subject: [PATCH 038/162] crypto: ccree - clean up clock handling Use devm_clk_get_optional() instead of devm_clk_get() and explicit optional clock handling. As clk_prepare_enable() and clk_disable_unprepare() handle optional clocks fine, the cc_clk_on() and cc_clk_off() wrappers can be removed. While at it, use the new "%pe" format specifier to print error codes. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 55 ++++++-------------------------- drivers/crypto/ccree/cc_driver.h | 2 -- drivers/crypto/ccree/cc_pm.c | 4 +-- 3 files changed, 11 insertions(+), 50 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index f8aba89e4873..a7b7f65939e2 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -302,22 +302,12 @@ static int init_cc_resources(struct platform_device *plat_dev) platform_set_drvdata(plat_dev, new_drvdata); new_drvdata->plat_dev = plat_dev; - clk = devm_clk_get(dev, NULL); - if (IS_ERR(clk)) - switch (PTR_ERR(clk)) { - /* Clock is optional so this might be fine */ - case -ENOENT: - break; - - /* Clock not available, let's try again soon */ - case -EPROBE_DEFER: - return -EPROBE_DEFER; - - default: - dev_err(dev, "Error getting clock: %ld\n", - PTR_ERR(clk)); - return PTR_ERR(clk); - } + clk = devm_clk_get_optional(dev, NULL); + if (IS_ERR(clk)) { + if (PTR_ERR(clk) != -EPROBE_DEFER) + dev_err(dev, "Error getting clock: %pe\n", clk); + return PTR_ERR(clk); + } new_drvdata->clk = clk; new_drvdata->coherent = of_dma_is_coherent(np); @@ -362,7 +352,7 @@ static int init_cc_resources(struct platform_device *plat_dev) return rc; } - rc = cc_clk_on(new_drvdata); + rc = clk_prepare_enable(new_drvdata->clk); if (rc) { dev_err(dev, "Failed to enable clock"); return rc; @@ -545,7 +535,7 @@ post_debugfs_err: post_regs_err: fini_cc_regs(new_drvdata); post_clk_err: - cc_clk_off(new_drvdata); + clk_disable_unprepare(new_drvdata->clk); return rc; } @@ -569,23 +559,7 @@ static void cleanup_cc_resources(struct platform_device *plat_dev) cc_fips_fini(drvdata); cc_debugfs_fini(drvdata); fini_cc_regs(drvdata); - cc_clk_off(drvdata); -} - -int cc_clk_on(struct cc_drvdata *drvdata) -{ - struct clk *clk = drvdata->clk; - int rc; - - if (IS_ERR(clk)) - /* Not all devices have a clock associated with CCREE */ - return 0; - - rc = clk_prepare_enable(clk); - if (rc) - return rc; - - return 0; + clk_disable_unprepare(drvdata->clk); } unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata) @@ -596,17 +570,6 @@ unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata) return HASH_LEN_SIZE_630; } -void cc_clk_off(struct cc_drvdata *drvdata) -{ - struct clk *clk = drvdata->clk; - - if (IS_ERR(clk)) - /* Not all devices have a clock associated with CCREE */ - return; - - clk_disable_unprepare(clk); -} - static int ccree_probe(struct platform_device *plat_dev) { int rc; diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index c227718ba992..b983e721f2f7 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -212,8 +212,6 @@ static inline void dump_byte_array(const char *name, const u8 *the_array, bool cc_wait_for_reset_completion(struct cc_drvdata *drvdata); int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe); void fini_cc_regs(struct cc_drvdata *drvdata); -int cc_clk_on(struct cc_drvdata *drvdata); -void cc_clk_off(struct cc_drvdata *drvdata); unsigned int cc_get_default_hash_len(struct cc_drvdata *drvdata); static inline void cc_iowrite(struct cc_drvdata *drvdata, u32 reg, u32 val) diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 24c368b866f6..81376173c3ec 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -26,7 +26,7 @@ int cc_pm_suspend(struct device *dev) dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); fini_cc_regs(drvdata); cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); - cc_clk_off(drvdata); + clk_disable_unprepare(drvdata->clk); return 0; } @@ -37,7 +37,7 @@ int cc_pm_resume(struct device *dev) dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n"); /* Enables the device source clk */ - rc = cc_clk_on(drvdata); + rc = clk_prepare_enable(drvdata->clk); if (rc) { dev_err(dev, "failed getting clock back on. We're toast.\n"); return rc; From ba99b6f9bd59ff47ca5d1c9cfa4c6a6bd5499d76 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:03 +0100 Subject: [PATCH 039/162] crypto: ccree - make mlli_params.mlli_virt_addr void * mlli_params.mlli_virt_addr is just a buffer of memory. This allows to drop a cast. No change in generated code. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 2 +- drivers/crypto/ccree/cc_buffer_mgr.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 1e335abd7440..abf08369f030 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -207,7 +207,7 @@ static int cc_generate_mlli(struct device *dev, struct buffer_array *sg_data, goto build_mlli_exit; } /* Point to start of MLLI */ - mlli_p = (u32 *)mlli_params->mlli_virt_addr; + mlli_p = mlli_params->mlli_virt_addr; /* go over all SG's and link it to one MLLI table */ for (i = 0; i < sg_data->num_of_buffers; i++) { union buffer_array_entry *entry = &sg_data->entry[i]; diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h index 827b6cb1236e..250d634902ad 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.h +++ b/drivers/crypto/ccree/cc_buffer_mgr.h @@ -32,7 +32,7 @@ struct cc_mlli { struct mlli_params { struct dma_pool *curr_pool; - u8 *mlli_virt_addr; + void *mlli_virt_addr; dma_addr_t mlli_dma_addr; u32 mlli_len; }; From 5fabab0d36d333c1d182369c845f6c7546a4afb8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:04 +0100 Subject: [PATCH 040/162] crypto: ccree - use existing helpers to split 64-bit addresses Use the existing lower_32_bits() and upper_32_bits() macros instead of explicit casts and shifts to split a 64-bit address in its two 32-bit parts. Drop the superfluous cast to "u16", as the FIELD_PREP() macro already masks it to the specified field width. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_hw_queue_defs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h index 9f4db9956e91..e3ec2a8084d8 100644 --- a/drivers/crypto/ccree/cc_hw_queue_defs.h +++ b/drivers/crypto/ccree/cc_hw_queue_defs.h @@ -239,9 +239,9 @@ static inline void set_din_type(struct cc_hw_desc *pdesc, enum cc_dma_mode dma_mode, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec) { - pdesc->word[0] = (u32)addr; + pdesc->word[0] = lower_32_bits(addr); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, ((u16)(addr >> 32))); + pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, upper_32_bits(addr)); #endif pdesc->word[1] |= FIELD_PREP(WORD1_DIN_DMA_MODE, dma_mode) | FIELD_PREP(WORD1_DIN_SIZE, size) | @@ -336,9 +336,9 @@ static inline void set_dout_type(struct cc_hw_desc *pdesc, enum cc_dma_mode dma_mode, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec) { - pdesc->word[2] = (u32)addr; + pdesc->word[2] = lower_32_bits(addr); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, ((u16)(addr >> 32))); + pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, upper_32_bits(addr)); #endif pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_DMA_MODE, dma_mode) | FIELD_PREP(WORD3_DOUT_SIZE, size) | From e431cc04381708660b321f6c5a5500ea577a903e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:05 +0100 Subject: [PATCH 041/162] crypto: ccree - defer larval_digest_addr init until needed While the larval digest addresses are not always used in cc_get_plain_hmac_key() and cc_hash_digest(), they are always calculated. Defer their calculations to the points where needed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 4 +++- drivers/crypto/ccree/cc_hash.c | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index db89144ce6c5..d1e7e82b9a40 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -417,7 +417,7 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, dma_addr_t key_dma_addr = 0; struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); - u32 larval_addr = cc_larval_digest_addr(ctx->drvdata, ctx->auth_mode); + u32 larval_addr; struct cc_crypto_req cc_req = {}; unsigned int blocksize; unsigned int digestsize; @@ -459,6 +459,8 @@ static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *authkey, /* Load hash initial state */ hw_desc_init(&desc[idx]); set_cipher_mode(&desc[idx], hashmode); + larval_addr = cc_larval_digest_addr(ctx->drvdata, + ctx->auth_mode); set_din_sram(&desc[idx], larval_addr, digestsize); set_flow_mode(&desc[idx], S_DIN_to_HASH); set_setup_mode(&desc[idx], SETUP_LOAD_STATE0); diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index defeb35a16a6..cebbe2f08f60 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -429,8 +429,7 @@ static int cc_hash_digest(struct ahash_request *req) bool is_hmac = ctx->is_hmac; struct cc_crypto_req cc_req = {}; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; - cc_sram_addr_t larval_digest_addr = - cc_larval_digest_addr(ctx->drvdata, ctx->hash_mode); + cc_sram_addr_t larval_digest_addr; int idx = 0; int rc = 0; gfp_t flags = cc_gfp_flags(&req->base); @@ -472,6 +471,8 @@ static int cc_hash_digest(struct ahash_request *req) set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr, ctx->inter_digestsize, NS_BIT); } else { + larval_digest_addr = cc_larval_digest_addr(ctx->drvdata, + ctx->hash_mode); set_din_sram(&desc[idx], larval_digest_addr, ctx->inter_digestsize); } From 37282f8d157169198e433d1f63e19a713267516c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:06 +0100 Subject: [PATCH 042/162] crypto: ccree - remove bogus paragraph about freeing SRAM The SRAM allocator does not support deallocating memory. Hence remove all references to freeing SRAM. Fix grammar while at it. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_sram_mgr.c | 3 --- drivers/crypto/ccree/cc_sram_mgr.h | 5 +---- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c index a3c13b37adce..7d0e0db4f8df 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.c +++ b/drivers/crypto/ccree/cc_sram_mgr.c @@ -50,9 +50,6 @@ int cc_sram_mgr_init(struct cc_drvdata *drvdata) /*! * Allocated buffer from SRAM pool. - * Note: Caller is responsible to free the LAST allocated buffer. - * This function does not taking care of any fragmentation may occur - * by the order of calls to alloc/free. * * \param drvdata * \param size The requested bytes to allocate diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h index 971029b0e2a1..3b62dc3bd422 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.h +++ b/drivers/crypto/ccree/cc_sram_mgr.h @@ -30,10 +30,7 @@ typedef u64 cc_sram_addr_t; int cc_sram_mgr_init(struct cc_drvdata *drvdata); /*! - * Allocated buffer from SRAM pool. - * Note: Caller is responsible to free the LAST allocated buffer. - * This function does not taking care of any fragmentation may occur - * by the order of calls to alloc/free. + * Allocate buffer from SRAM pool. * * \param drvdata * \param size The requested bytes to allocate From 1a895f1d5bceee0c6b66dcfdabcc9804006ca229 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:07 +0100 Subject: [PATCH 043/162] crypto: ccree - use u32 for SRAM addresses SRAM addresses are small integer offsets into local SRAM. Currently they are stored using a mixture of cc_sram_addr_t (u64), u32, and dma_addr_t types. Settle on u32, and remove the cc_sram_addr_t typedefs. This allows to drop several casts. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 6 +++--- drivers/crypto/ccree/cc_buffer_mgr.h | 2 +- drivers/crypto/ccree/cc_cipher.c | 10 +++++----- drivers/crypto/ccree/cc_driver.h | 2 +- drivers/crypto/ccree/cc_hash.c | 21 ++++++++++----------- drivers/crypto/ccree/cc_hash.h | 5 ++--- drivers/crypto/ccree/cc_hw_queue_defs.h | 10 ++++------ drivers/crypto/ccree/cc_sram_mgr.c | 24 ++++++++++-------------- drivers/crypto/ccree/cc_sram_mgr.h | 15 ++++----------- 9 files changed, 40 insertions(+), 55 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index d1e7e82b9a40..78afa753b44f 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -26,7 +26,7 @@ #define MAX_NONCE_SIZE CTR_RFC3686_NONCE_SIZE struct cc_aead_handle { - cc_sram_addr_t sram_workspace_addr; + u32 sram_workspace_addr; struct list_head aead_list; }; @@ -797,7 +797,7 @@ static void cc_proc_authen_desc(struct aead_request *areq, * assoc. + iv + data -compact in one table * if assoclen is ZERO only IV perform */ - cc_sram_addr_t mlli_addr = areq_ctx->assoc.sram_addr; + u32 mlli_addr = areq_ctx->assoc.sram_addr; u32 mlli_nents = areq_ctx->assoc.mlli_nents; if (areq_ctx->is_single_pass) { @@ -1171,7 +1171,7 @@ static void cc_mlli_to_sram(struct aead_request *req, req_ctx->data_buff_type == CC_DMA_BUF_MLLI || !req_ctx->is_single_pass) && req_ctx->mlli_params.mlli_len) { dev_dbg(dev, "Copy-to-sram: mlli_dma=%08x, mlli_size=%u\n", - (unsigned int)ctx->drvdata->mlli_sram_addr, + ctx->drvdata->mlli_sram_addr, req_ctx->mlli_params.mlli_len); /* Copy MLLI table host-to-sram */ hw_desc_init(&desc[*seq_size]); diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h index 250d634902ad..653441b6542e 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.h +++ b/drivers/crypto/ccree/cc_buffer_mgr.h @@ -24,7 +24,7 @@ enum cc_sg_cpy_direct { }; struct cc_mlli { - cc_sram_addr_t sram_addr; + u32 sram_addr; unsigned int mapped_nents; unsigned int nents; //sg nents unsigned int mlli_nents; //mlli nents might be different than the above diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 3f93422c390c..b020583dc35d 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -745,7 +745,7 @@ static void cc_setup_mlli_desc(struct crypto_tfm *tfm, dev_dbg(dev, " bypass params addr %pad length 0x%X addr 0x%08X\n", &req_ctx->mlli_params.mlli_dma_addr, req_ctx->mlli_params.mlli_len, - (unsigned int)ctx_p->drvdata->mlli_sram_addr); + ctx_p->drvdata->mlli_sram_addr); hw_desc_init(&desc[*seq_size]); set_din_type(&desc[*seq_size], DMA_DLLI, req_ctx->mlli_params.mlli_dma_addr, @@ -793,16 +793,16 @@ static void cc_setup_flow_desc(struct crypto_tfm *tfm, req_ctx->in_mlli_nents, NS_BIT); if (req_ctx->out_nents == 0) { dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n", - (unsigned int)ctx_p->drvdata->mlli_sram_addr, - (unsigned int)ctx_p->drvdata->mlli_sram_addr); + ctx_p->drvdata->mlli_sram_addr, + ctx_p->drvdata->mlli_sram_addr); set_dout_mlli(&desc[*seq_size], ctx_p->drvdata->mlli_sram_addr, req_ctx->in_mlli_nents, NS_BIT, (!last_desc ? 0 : 1)); } else { dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n", - (unsigned int)ctx_p->drvdata->mlli_sram_addr, - (unsigned int)ctx_p->drvdata->mlli_sram_addr + + ctx_p->drvdata->mlli_sram_addr, + ctx_p->drvdata->mlli_sram_addr + (u32)LLI_ENTRY_BYTE_SIZE * req_ctx->in_nents); set_dout_mlli(&desc[*seq_size], (ctx_p->drvdata->mlli_sram_addr + diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index b983e721f2f7..d7928b164a3b 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -139,7 +139,7 @@ struct cc_drvdata { int irq; struct completion hw_queue_avail; /* wait for HW queue availability */ struct platform_device *plat_dev; - cc_sram_addr_t mlli_sram_addr; + u32 mlli_sram_addr; void *buff_mgr_handle; void *cipher_handle; void *hash_handle; diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index cebbe2f08f60..0b179aafd484 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -20,8 +20,8 @@ #define CC_SM3_HASH_LEN_SIZE 8 struct cc_hash_handle { - cc_sram_addr_t digest_len_sram_addr; /* const value in SRAM*/ - cc_sram_addr_t larval_digest_sram_addr; /* const value in SRAM */ + u32 digest_len_sram_addr; /* const value in SRAM*/ + u32 larval_digest_sram_addr; /* const value in SRAM */ struct list_head hash_list; }; @@ -429,7 +429,7 @@ static int cc_hash_digest(struct ahash_request *req) bool is_hmac = ctx->is_hmac; struct cc_crypto_req cc_req = {}; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; - cc_sram_addr_t larval_digest_addr; + u32 larval_digest_addr; int idx = 0; int rc = 0; gfp_t flags = cc_gfp_flags(&req->base); @@ -734,7 +734,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, int digestsize = 0; int i, idx = 0, rc = 0; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; - cc_sram_addr_t larval_addr; + u32 larval_addr; struct device *dev; ctx = crypto_ahash_ctx(ahash); @@ -1868,7 +1868,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, int cc_init_hash_sram(struct cc_drvdata *drvdata) { struct cc_hash_handle *hash_handle = drvdata->hash_handle; - cc_sram_addr_t sram_buff_ofs = hash_handle->digest_len_sram_addr; + u32 sram_buff_ofs = hash_handle->digest_len_sram_addr; unsigned int larval_seq_len = 0; struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)]; bool large_sha_supported = (drvdata->hw_rev >= CC_HW_REV_712); @@ -1974,7 +1974,7 @@ init_digest_const_err: int cc_hash_alloc(struct cc_drvdata *drvdata) { struct cc_hash_handle *hash_handle; - cc_sram_addr_t sram_buff; + u32 sram_buff; u32 sram_size_to_alloc; struct device *dev = drvdata_to_dev(drvdata); int rc = 0; @@ -2266,13 +2266,13 @@ static const void *cc_larval_digest(struct device *dev, u32 mode) * * \return u32 The address of the initial digest in SRAM */ -cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode) +u32 cc_larval_digest_addr(void *drvdata, u32 mode) { struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata; struct cc_hash_handle *hash_handle = _drvdata->hash_handle; struct device *dev = drvdata_to_dev(_drvdata); bool sm3_supported = (_drvdata->hw_rev >= CC_HW_REV_713); - cc_sram_addr_t addr; + u32 addr; switch (mode) { case DRV_HASH_NULL: @@ -2324,12 +2324,11 @@ cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode) return hash_handle->larval_digest_sram_addr; } -cc_sram_addr_t -cc_digest_len_addr(void *drvdata, u32 mode) +u32 cc_digest_len_addr(void *drvdata, u32 mode) { struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata; struct cc_hash_handle *hash_handle = _drvdata->hash_handle; - cc_sram_addr_t digest_len_addr = hash_handle->digest_len_sram_addr; + u32 digest_len_addr = hash_handle->digest_len_sram_addr; switch (mode) { case DRV_HASH_SHA1: diff --git a/drivers/crypto/ccree/cc_hash.h b/drivers/crypto/ccree/cc_hash.h index 3dbd0abefea0..d76ecae996ca 100644 --- a/drivers/crypto/ccree/cc_hash.h +++ b/drivers/crypto/ccree/cc_hash.h @@ -89,8 +89,7 @@ int cc_hash_free(struct cc_drvdata *drvdata); * * \return u32 returns the address of the initial digest length in SRAM */ -cc_sram_addr_t -cc_digest_len_addr(void *drvdata, u32 mode); +u32 cc_digest_len_addr(void *drvdata, u32 mode); /*! * Gets the address of the initial digest in SRAM @@ -102,6 +101,6 @@ cc_digest_len_addr(void *drvdata, u32 mode); * * \return u32 The address of the initial digest in SRAM */ -cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode); +u32 cc_larval_digest_addr(void *drvdata, u32 mode); #endif /*__CC_HASH_H__*/ diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h index e3ec2a8084d8..36786344c57a 100644 --- a/drivers/crypto/ccree/cc_hw_queue_defs.h +++ b/drivers/crypto/ccree/cc_hw_queue_defs.h @@ -290,10 +290,9 @@ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot) * @addr: DIN address * @size Data size in bytes */ -static inline void set_din_sram(struct cc_hw_desc *pdesc, dma_addr_t addr, - u32 size) +static inline void set_din_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size) { - pdesc->word[0] = (u32)addr; + pdesc->word[0] = addr; pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size) | FIELD_PREP(WORD1_DIN_DMA_MODE, DMA_SRAM); } @@ -373,9 +372,8 @@ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr, * @last_ind: The last indication bit * @axi_sec: AXI secure bit */ -static inline void set_dout_mlli(struct cc_hw_desc *pdesc, dma_addr_t addr, - u32 size, enum cc_axi_sec axi_sec, - bool last_ind) +static inline void set_dout_mlli(struct cc_hw_desc *pdesc, u32 addr, u32 size, + enum cc_axi_sec axi_sec, bool last_ind) { set_dout_type(pdesc, DMA_MLLI, addr, size, axi_sec); pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind); diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c index 7d0e0db4f8df..d46aad7c8140 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.c +++ b/drivers/crypto/ccree/cc_sram_mgr.c @@ -9,7 +9,7 @@ * @sram_free_offset: the offset to the non-allocated area */ struct cc_sram_ctx { - cc_sram_addr_t sram_free_offset; + u32 sram_free_offset; }; /** @@ -22,23 +22,20 @@ struct cc_sram_ctx { int cc_sram_mgr_init(struct cc_drvdata *drvdata) { struct cc_sram_ctx *ctx; - dma_addr_t start = 0; + u32 start = 0; struct device *dev = drvdata_to_dev(drvdata); if (drvdata->hw_rev < CC_HW_REV_712) { /* Pool starts after ROM bytes */ - start = (dma_addr_t)cc_ioread(drvdata, - CC_REG(HOST_SEP_SRAM_THRESHOLD)); - + start = cc_ioread(drvdata, CC_REG(HOST_SEP_SRAM_THRESHOLD)); if ((start & 0x3) != 0) { - dev_err(dev, "Invalid SRAM offset %pad\n", &start); + dev_err(dev, "Invalid SRAM offset 0x%x\n", start); return -EINVAL; } } /* Allocate "this" context */ ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) return -ENOMEM; @@ -54,11 +51,11 @@ int cc_sram_mgr_init(struct cc_drvdata *drvdata) * \param drvdata * \param size The requested bytes to allocate */ -cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) +u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) { struct cc_sram_ctx *smgr_ctx = drvdata->sram_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); - cc_sram_addr_t p; + u32 p; if ((size & 0x3)) { dev_err(dev, "Requested buffer size (%u) is not multiple of 4", @@ -66,14 +63,14 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) return NULL_SRAM_ADDR; } if (size > (CC_CC_SRAM_SIZE - smgr_ctx->sram_free_offset)) { - dev_err(dev, "Not enough space to allocate %u B (at offset %llu)\n", + dev_err(dev, "Not enough space to allocate %u B (at offset %u)\n", size, smgr_ctx->sram_free_offset); return NULL_SRAM_ADDR; } p = smgr_ctx->sram_free_offset; smgr_ctx->sram_free_offset += size; - dev_dbg(dev, "Allocated %u B @ %u\n", size, (unsigned int)p); + dev_dbg(dev, "Allocated %u B @ %u\n", size, p); return p; } @@ -88,9 +85,8 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) * @seq: A pointer to the given IN/OUT descriptor sequence * @seq_len: A pointer to the given IN/OUT sequence length */ -void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst, - unsigned int nelement, struct cc_hw_desc *seq, - unsigned int *seq_len) +void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement, + struct cc_hw_desc *seq, unsigned int *seq_len) { u32 i; unsigned int idx = *seq_len; diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h index 3b62dc3bd422..04a857259d41 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.h +++ b/drivers/crypto/ccree/cc_sram_mgr.h @@ -10,13 +10,7 @@ struct cc_drvdata; -/** - * Address (offset) within CC internal SRAM - */ - -typedef u64 cc_sram_addr_t; - -#define NULL_SRAM_ADDR ((cc_sram_addr_t)-1) +#define NULL_SRAM_ADDR ((u32)-1) /*! * Initializes SRAM pool. @@ -35,7 +29,7 @@ int cc_sram_mgr_init(struct cc_drvdata *drvdata); * \param drvdata * \param size The requested bytes to allocate */ -cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); +u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); /** * cc_set_sram_desc() - Create const descriptors sequence to @@ -48,8 +42,7 @@ cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); * @seq: A pointer to the given IN/OUT descriptor sequence * @seq_len: A pointer to the given IN/OUT sequence length */ -void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst, - unsigned int nelement, struct cc_hw_desc *seq, - unsigned int *seq_len); +void cc_set_sram_desc(const u32 *src, u32 dst, unsigned int nelement, + struct cc_hw_desc *seq, unsigned int *seq_len); #endif /*__CC_SRAM_MGR_H__*/ From 8c7849a30255cfd9a9ba412b1517e20a8572448b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:08 +0100 Subject: [PATCH 044/162] crypto: ccree - simplify Runtime PM handling Currently, a large part of the probe function runs before Runtime PM is enabled. As the driver manages the device's clock manually, this may work fine on some systems, but may break on platforms with a more complex power hierarchy. Fix this by moving the initialization of Runtime PM before the first register access (in cc_wait_for_reset_completion()), and putting the device to sleep only after the last access (in cc_set_ree_fips_status()). This allows to remove the pm_on flag, which was used to track manually if Runtime PM had been enabled or not. Remove the cc_pm_{init,go,fini}() wrappers, as they are called only once, and obscure operation. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 43 +++++++++++++++++++------------- drivers/crypto/ccree/cc_driver.h | 1 - drivers/crypto/ccree/cc_pm.c | 38 +++------------------------- drivers/crypto/ccree/cc_pm.h | 12 --------- 4 files changed, 29 insertions(+), 65 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index a7b7f65939e2..c7e44f212d4b 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "cc_driver.h" #include "cc_request_mgr.h" @@ -360,6 +361,16 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata->sec_disabled = cc_sec_disable; + pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + rc = pm_runtime_get_sync(dev); + if (rc < 0) { + dev_err(dev, "pm_runtime_get_sync() failed: %d\n", rc); + goto post_pm_err; + } + /* wait for Crytpcell reset completion */ if (!cc_wait_for_reset_completion(new_drvdata)) { dev_err(dev, "Cryptocell reset not completed"); @@ -372,7 +383,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n", val, hw_rev->sig); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } sig_cidr = val; hw_rev_pidr = cc_ioread(new_drvdata, new_drvdata->ver_offset); @@ -383,7 +394,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC PIDR: PIDR0124=0x%08X != expected=0x%08X\n", val, hw_rev->pidr_0124); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } hw_rev_pidr = val; @@ -392,7 +403,7 @@ static int init_cc_resources(struct platform_device *plat_dev) dev_err(dev, "Invalid CC CIDR: CIDR0123=0x%08X != expected=0x%08X\n", val, hw_rev->cidr_0123); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } sig_cidr = val; @@ -411,7 +422,7 @@ static int init_cc_resources(struct platform_device *plat_dev) default: dev_err(dev, "Unsupported engines configuration.\n"); rc = -EINVAL; - goto post_clk_err; + goto post_pm_err; } /* Check security disable state */ @@ -437,14 +448,14 @@ static int init_cc_resources(struct platform_device *plat_dev) new_drvdata); if (rc) { dev_err(dev, "Could not register to interrupt %d\n", irq); - goto post_clk_err; + goto post_pm_err; } dev_dbg(dev, "Registered to IRQ: %d\n", irq); rc = init_cc_regs(new_drvdata, true); if (rc) { dev_err(dev, "init_cc_regs failed\n"); - goto post_clk_err; + goto post_pm_err; } rc = cc_debugfs_init(new_drvdata); @@ -483,12 +494,6 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_req_mgr_err; } - rc = cc_pm_init(new_drvdata); - if (rc) { - dev_err(dev, "cc_pm_init failed\n"); - goto post_buf_mgr_err; - } - /* Allocate crypto algs */ rc = cc_cipher_alloc(new_drvdata); if (rc) { @@ -509,15 +514,13 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_hash_err; } - /* All set, we can allow autosuspend */ - cc_pm_go(new_drvdata); - /* If we got here and FIPS mode is enabled * it means all FIPS test passed, so let TEE * know we're good. */ cc_set_ree_fips_status(new_drvdata, true); + pm_runtime_put(dev); return 0; post_hash_err: @@ -534,7 +537,10 @@ post_debugfs_err: cc_debugfs_fini(new_drvdata); post_regs_err: fini_cc_regs(new_drvdata); -post_clk_err: +post_pm_err: + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); clk_disable_unprepare(new_drvdata->clk); return rc; } @@ -547,18 +553,21 @@ void fini_cc_regs(struct cc_drvdata *drvdata) static void cleanup_cc_resources(struct platform_device *plat_dev) { + struct device *dev = &plat_dev->dev; struct cc_drvdata *drvdata = (struct cc_drvdata *)platform_get_drvdata(plat_dev); cc_aead_free(drvdata); cc_hash_free(drvdata); cc_cipher_free(drvdata); - cc_pm_fini(drvdata); cc_buffer_mgr_fini(drvdata); cc_req_mgr_fini(drvdata); cc_fips_fini(drvdata); cc_debugfs_fini(drvdata); fini_cc_regs(drvdata); + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); clk_disable_unprepare(drvdata->clk); } diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index d7928b164a3b..9e94a29d84ae 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -158,7 +158,6 @@ struct cc_drvdata { int std_bodies; bool sec_disabled; u32 comp_mask; - bool pm_on; }; struct cc_crypto_alg { diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 81376173c3ec..f7729fc1ee59 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -64,23 +64,15 @@ int cc_pm_resume(struct device *dev) int cc_pm_get(struct device *dev) { - int rc = 0; - struct cc_drvdata *drvdata = dev_get_drvdata(dev); - - if (drvdata->pm_on) - rc = pm_runtime_get_sync(dev); + int rc = pm_runtime_get_sync(dev); return (rc == 1 ? 0 : rc); } void cc_pm_put_suspend(struct device *dev) { - struct cc_drvdata *drvdata = dev_get_drvdata(dev); - - if (drvdata->pm_on) { - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - } + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } bool cc_pm_is_dev_suspended(struct device *dev) @@ -88,27 +80,3 @@ bool cc_pm_is_dev_suspended(struct device *dev) /* check device state using runtime api */ return pm_runtime_suspended(dev); } - -int cc_pm_init(struct cc_drvdata *drvdata) -{ - struct device *dev = drvdata_to_dev(drvdata); - - /* must be before the enabling to avoid redundant suspending */ - pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); - pm_runtime_use_autosuspend(dev); - /* set us as active - note we won't do PM ops until cc_pm_go()! */ - return pm_runtime_set_active(dev); -} - -/* enable the PM module*/ -void cc_pm_go(struct cc_drvdata *drvdata) -{ - pm_runtime_enable(drvdata_to_dev(drvdata)); - drvdata->pm_on = true; -} - -void cc_pm_fini(struct cc_drvdata *drvdata) -{ - pm_runtime_disable(drvdata_to_dev(drvdata)); - drvdata->pm_on = false; -} diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h index 80a18e11cae4..2dcf53fa108e 100644 --- a/drivers/crypto/ccree/cc_pm.h +++ b/drivers/crypto/ccree/cc_pm.h @@ -15,9 +15,6 @@ extern const struct dev_pm_ops ccree_pm; -int cc_pm_init(struct cc_drvdata *drvdata); -void cc_pm_go(struct cc_drvdata *drvdata); -void cc_pm_fini(struct cc_drvdata *drvdata); int cc_pm_suspend(struct device *dev); int cc_pm_resume(struct device *dev); int cc_pm_get(struct device *dev); @@ -26,15 +23,6 @@ bool cc_pm_is_dev_suspended(struct device *dev); #else -static inline int cc_pm_init(struct cc_drvdata *drvdata) -{ - return 0; -} - -static inline void cc_pm_go(struct cc_drvdata *drvdata) {} - -static inline void cc_pm_fini(struct cc_drvdata *drvdata) {} - static inline int cc_pm_get(struct device *dev) { return 0; From 1b940e35a5176fd22de6000af723475c07660759 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:09 +0100 Subject: [PATCH 045/162] crypto: ccree - use of_device_get_match_data() If the driver is probed, it means a match was found in arm_ccree_dev_of_match[]. Hence we can just use the of_device_get_match_data() helper. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index c7e44f212d4b..e365ede32cc0 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "cc_driver.h" @@ -270,7 +271,6 @@ static int init_cc_resources(struct platform_device *plat_dev) u32 val, hw_rev_pidr, sig_cidr; u64 dma_mask; const struct cc_hw_data *hw_rev; - const struct of_device_id *dev_id; struct clk *clk; int irq; int rc = 0; @@ -279,11 +279,7 @@ static int init_cc_resources(struct platform_device *plat_dev) if (!new_drvdata) return -ENOMEM; - dev_id = of_match_node(arm_ccree_dev_of_match, np); - if (!dev_id) - return -ENODEV; - - hw_rev = (struct cc_hw_data *)dev_id->data; + hw_rev = of_device_get_match_data(dev); new_drvdata->hw_rev_name = hw_rev->name; new_drvdata->hw_rev = hw_rev->rev; new_drvdata->std_bodies = hw_rev->std_bodies; From 8f76b35211bd78437d37de897d29a0b83e370c08 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:10 +0100 Subject: [PATCH 046/162] crypto: ccree - remove cc_pm_is_dev_suspended() wrapper If CONFIG_PM=y, cc_pm_is_dev_suspended() is just a wrapper around pm_runtime_suspended(). If CONFIG_PM=n, cc_pm_is_dev_suspended() a dummy that behaves exactly the same as the dummy for pm_runtime_suspended(). Hence remove cc_pm_is_dev_suspended(), and call pm_runtime_suspended() directly. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 2 +- drivers/crypto/ccree/cc_pm.c | 6 ------ drivers/crypto/ccree/cc_pm.h | 7 ------- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index e365ede32cc0..02442596310c 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -136,7 +136,7 @@ static irqreturn_t cc_isr(int irq, void *dev_id) /* STAT_OP_TYPE_GENERIC STAT_PHASE_0: Interrupt */ /* if driver suspended return, probably shared interrupt */ - if (cc_pm_is_dev_suspended(dev)) + if (pm_runtime_suspended(dev)) return IRQ_NONE; /* read the interrupt status */ diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index f7729fc1ee59..3b4927c41a17 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -74,9 +74,3 @@ void cc_pm_put_suspend(struct device *dev) pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } - -bool cc_pm_is_dev_suspended(struct device *dev) -{ - /* check device state using runtime api */ - return pm_runtime_suspended(dev); -} diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h index 2dcf53fa108e..1fe1fc827f62 100644 --- a/drivers/crypto/ccree/cc_pm.h +++ b/drivers/crypto/ccree/cc_pm.h @@ -19,7 +19,6 @@ int cc_pm_suspend(struct device *dev); int cc_pm_resume(struct device *dev); int cc_pm_get(struct device *dev); void cc_pm_put_suspend(struct device *dev); -bool cc_pm_is_dev_suspended(struct device *dev); #else @@ -30,12 +29,6 @@ static inline int cc_pm_get(struct device *dev) static inline void cc_pm_put_suspend(struct device *dev) {} -static inline bool cc_pm_is_dev_suspended(struct device *dev) -{ - /* if PM not supported device is never suspend */ - return false; -} - #endif #endif /*__POWER_MGR_H__*/ From f33d807c9d8847c634ddcb033178f93347428e7b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:11 +0100 Subject: [PATCH 047/162] crypto: ccree - make cc_pm_{suspend,resume}() static cc_pm_suspend() and cc_pm_resume() are not used outside drivers/crypto/ccree/cc_pm.c. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_pm.c | 12 ++++++------ drivers/crypto/ccree/cc_pm.h | 2 -- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 3b4927c41a17..d39e1664fc7e 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -15,11 +15,7 @@ #define POWER_DOWN_ENABLE 0x01 #define POWER_DOWN_DISABLE 0x00 -const struct dev_pm_ops ccree_pm = { - SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL) -}; - -int cc_pm_suspend(struct device *dev) +static int cc_pm_suspend(struct device *dev) { struct cc_drvdata *drvdata = dev_get_drvdata(dev); @@ -30,7 +26,7 @@ int cc_pm_suspend(struct device *dev) return 0; } -int cc_pm_resume(struct device *dev) +static int cc_pm_resume(struct device *dev) { int rc; struct cc_drvdata *drvdata = dev_get_drvdata(dev); @@ -62,6 +58,10 @@ int cc_pm_resume(struct device *dev) return 0; } +const struct dev_pm_ops ccree_pm = { + SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL) +}; + int cc_pm_get(struct device *dev) { int rc = pm_runtime_get_sync(dev); diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h index 1fe1fc827f62..50cac33de118 100644 --- a/drivers/crypto/ccree/cc_pm.h +++ b/drivers/crypto/ccree/cc_pm.h @@ -15,8 +15,6 @@ extern const struct dev_pm_ops ccree_pm; -int cc_pm_suspend(struct device *dev); -int cc_pm_resume(struct device *dev); int cc_pm_get(struct device *dev); void cc_pm_put_suspend(struct device *dev); From f1b19dff53cfc43b5171f0c74fe56e0724ff7b01 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:12 +0100 Subject: [PATCH 048/162] crypto: ccree - remove struct cc_sram_ctx The cc_sram_ctx structure contains only a single member, and only one instance exists. Simplify the code and reduce memory consumption by moving this member to struct cc_drvdata. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.h | 2 +- drivers/crypto/ccree/cc_sram_mgr.c | 27 +++++---------------------- 2 files changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index 9e94a29d84ae..7e483c228582 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -146,7 +146,7 @@ struct cc_drvdata { void *aead_handle; void *request_mgr_handle; void *fips_handle; - void *sram_mgr_handle; + u32 sram_free_offset; /* offset to non-allocated area in SRAM */ void *debugfs; struct clk *clk; bool coherent; diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c index d46aad7c8140..38f36cbc05b3 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.c +++ b/drivers/crypto/ccree/cc_sram_mgr.c @@ -4,14 +4,6 @@ #include "cc_driver.h" #include "cc_sram_mgr.h" -/** - * struct cc_sram_ctx -Internal RAM context manager - * @sram_free_offset: the offset to the non-allocated area - */ -struct cc_sram_ctx { - u32 sram_free_offset; -}; - /** * cc_sram_mgr_init() - Initializes SRAM pool. * The pool starts right at the beginning of SRAM. @@ -21,7 +13,6 @@ struct cc_sram_ctx { */ int cc_sram_mgr_init(struct cc_drvdata *drvdata) { - struct cc_sram_ctx *ctx; u32 start = 0; struct device *dev = drvdata_to_dev(drvdata); @@ -34,14 +25,7 @@ int cc_sram_mgr_init(struct cc_drvdata *drvdata) } } - /* Allocate "this" context */ - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - ctx->sram_free_offset = start; - drvdata->sram_mgr_handle = ctx; - + drvdata->sram_free_offset = start; return 0; } @@ -53,7 +37,6 @@ int cc_sram_mgr_init(struct cc_drvdata *drvdata) */ u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) { - struct cc_sram_ctx *smgr_ctx = drvdata->sram_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); u32 p; @@ -62,14 +45,14 @@ u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) size); return NULL_SRAM_ADDR; } - if (size > (CC_CC_SRAM_SIZE - smgr_ctx->sram_free_offset)) { + if (size > (CC_CC_SRAM_SIZE - drvdata->sram_free_offset)) { dev_err(dev, "Not enough space to allocate %u B (at offset %u)\n", - size, smgr_ctx->sram_free_offset); + size, drvdata->sram_free_offset); return NULL_SRAM_ADDR; } - p = smgr_ctx->sram_free_offset; - smgr_ctx->sram_free_offset += size; + p = drvdata->sram_free_offset; + drvdata->sram_free_offset += size; dev_dbg(dev, "Allocated %u B @ %u\n", size, p); return p; } From ec8f3a5519653c41b23f22ef99fb2c5289f2263a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:13 +0100 Subject: [PATCH 049/162] crypto: ccree - remove struct cc_debugfs_ctx The cc_debugfs_ctx structure contains only a single member, and only one instance exists. Simplify the code and reduce memory consumption by moving this member to struct cc_drvdata. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_debugfs.c | 27 +++++++-------------------- drivers/crypto/ccree/cc_driver.h | 2 +- 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c index 35f3a2137502..c454afce7781 100644 --- a/drivers/crypto/ccree/cc_debugfs.c +++ b/drivers/crypto/ccree/cc_debugfs.c @@ -8,10 +8,6 @@ #include "cc_crypto_ctx.h" #include "cc_debugfs.h" -struct cc_debugfs_ctx { - struct dentry *dir; -}; - #define CC_DEBUG_REG(_X) { \ .name = __stringify(_X),\ .offset = CC_REG(_X) \ @@ -67,13 +63,8 @@ void __exit cc_debugfs_global_fini(void) int cc_debugfs_init(struct cc_drvdata *drvdata) { struct device *dev = drvdata_to_dev(drvdata); - struct cc_debugfs_ctx *ctx; struct debugfs_regset32 *regset, *verset; - ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL); if (!regset) return -ENOMEM; @@ -83,15 +74,16 @@ int cc_debugfs_init(struct cc_drvdata *drvdata) regset->base = drvdata->cc_base; regset->dev = dev; - ctx->dir = debugfs_create_dir(drvdata->plat_dev->name, cc_debugfs_dir); + drvdata->dir = debugfs_create_dir(drvdata->plat_dev->name, + cc_debugfs_dir); - debugfs_create_regset32("regs", 0400, ctx->dir, regset); - debugfs_create_bool("coherent", 0400, ctx->dir, &drvdata->coherent); + debugfs_create_regset32("regs", 0400, drvdata->dir, regset); + debugfs_create_bool("coherent", 0400, drvdata->dir, &drvdata->coherent); verset = devm_kzalloc(dev, sizeof(*verset), GFP_KERNEL); /* Failing here is not important enough to fail the module load */ if (!verset) - goto out; + return 0; if (drvdata->hw_rev <= CC_HW_REV_712) { ver_sig_regs[0].offset = drvdata->sig_offset; @@ -105,16 +97,11 @@ int cc_debugfs_init(struct cc_drvdata *drvdata) verset->base = drvdata->cc_base; verset->dev = dev; - debugfs_create_regset32("version", 0400, ctx->dir, verset); - -out: - drvdata->debugfs = ctx; + debugfs_create_regset32("version", 0400, drvdata->dir, verset); return 0; } void cc_debugfs_fini(struct cc_drvdata *drvdata) { - struct cc_debugfs_ctx *ctx = (struct cc_debugfs_ctx *)drvdata->debugfs; - - debugfs_remove_recursive(ctx->dir); + debugfs_remove_recursive(drvdata->dir); } diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index 7e483c228582..7360b403655c 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -147,7 +147,7 @@ struct cc_drvdata { void *request_mgr_handle; void *fips_handle; u32 sram_free_offset; /* offset to non-allocated area in SRAM */ - void *debugfs; + struct dentry *dir; /* for debugfs */ struct clk *clk; bool coherent; char *hw_rev_name; From 040187a0faa6592a9499ec9a1162efd77e3611c2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:14 +0100 Subject: [PATCH 050/162] crypto: ccree - remove struct buff_mgr_handle The buff_mgr_handle structure contains only a single member, and only one instance exists. Simplify the code and reduce memory consumption by moving this member to struct cc_drvdata. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 41 ++++++---------------------- drivers/crypto/ccree/cc_driver.h | 2 +- 2 files changed, 9 insertions(+), 34 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index abf08369f030..f2e782d2be15 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -19,10 +19,6 @@ enum dma_buffer_type { DMA_BUFF_TYPE = 2, }; -struct buff_mgr_handle { - struct dma_pool *mlli_buffs_pool; -}; - union buffer_array_entry { struct scatterlist *sgl; dma_addr_t buffer_dma; @@ -402,7 +398,6 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, { struct cipher_req_ctx *req_ctx = (struct cipher_req_ctx *)ctx; struct mlli_params *mlli_params = &req_ctx->mlli_params; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); struct buffer_array sg_data; u32 dummy = 0; @@ -466,7 +461,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags); if (rc) goto cipher_exit; @@ -1007,7 +1002,6 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) struct device *dev = drvdata_to_dev(drvdata); struct buffer_array sg_data; unsigned int authsize = areq_ctx->req_authsize; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; int rc = 0; struct crypto_aead *tfm = crypto_aead_reqtfm(req); bool is_gcm4543 = areq_ctx->is_gcm4543; @@ -1180,7 +1174,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) */ if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI || areq_ctx->data_buff_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags); if (rc) goto aead_map_failure; @@ -1208,7 +1202,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, u32 *curr_buff_cnt = cc_hash_buf_cnt(areq_ctx); struct mlli_params *mlli_params = &areq_ctx->mlli_params; struct buffer_array sg_data; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; int rc = 0; u32 dummy = 0; u32 mapped_nents = 0; @@ -1255,7 +1248,7 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, /*build mlli */ if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; /* add the src data to the sg_data */ cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, nbytes, 0, true, &areq_ctx->mlli_nents); @@ -1293,7 +1286,6 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, unsigned int update_data_len; u32 total_in_len = nbytes + *curr_buff_cnt; struct buffer_array sg_data; - struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle; unsigned int swap_index = 0; int rc = 0; u32 dummy = 0; @@ -1368,7 +1360,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, } if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) { - mlli_params->curr_pool = buff_mgr->mlli_buffs_pool; + mlli_params->curr_pool = drvdata->mlli_buffs_pool; /* add the src data to the sg_data */ cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, (update_data_len - *curr_buff_cnt), 0, true, @@ -1435,39 +1427,22 @@ void cc_unmap_hash_request(struct device *dev, void *ctx, int cc_buffer_mgr_init(struct cc_drvdata *drvdata) { - struct buff_mgr_handle *buff_mgr_handle; struct device *dev = drvdata_to_dev(drvdata); - buff_mgr_handle = kmalloc(sizeof(*buff_mgr_handle), GFP_KERNEL); - if (!buff_mgr_handle) - return -ENOMEM; - - drvdata->buff_mgr_handle = buff_mgr_handle; - - buff_mgr_handle->mlli_buffs_pool = + drvdata->mlli_buffs_pool = dma_pool_create("dx_single_mlli_tables", dev, MAX_NUM_OF_TOTAL_MLLI_ENTRIES * LLI_ENTRY_BYTE_SIZE, MLLI_TABLE_MIN_ALIGNMENT, 0); - if (!buff_mgr_handle->mlli_buffs_pool) - goto error; + if (!drvdata->mlli_buffs_pool) + return -ENOMEM; return 0; - -error: - cc_buffer_mgr_fini(drvdata); - return -ENOMEM; } int cc_buffer_mgr_fini(struct cc_drvdata *drvdata) { - struct buff_mgr_handle *buff_mgr_handle = drvdata->buff_mgr_handle; - - if (buff_mgr_handle) { - dma_pool_destroy(buff_mgr_handle->mlli_buffs_pool); - kfree(drvdata->buff_mgr_handle); - drvdata->buff_mgr_handle = NULL; - } + dma_pool_destroy(drvdata->mlli_buffs_pool); return 0; } diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index 7360b403655c..4895f124d2b8 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -140,7 +140,7 @@ struct cc_drvdata { struct completion hw_queue_avail; /* wait for HW queue availability */ struct platform_device *plat_dev; u32 mlli_sram_addr; - void *buff_mgr_handle; + struct dma_pool *mlli_buffs_pool; void *cipher_handle; void *hash_handle; void *aead_handle; From c23d7997559dcceaea8483398ef6cc400b22bfb1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:15 +0100 Subject: [PATCH 051/162] crypto: ccree - remove struct cc_cipher_handle The cc_cipher_handle structure contains only a single member, and only one instance exists. Simplify the code and reduce memory consumption by moving this member to struct cc_drvdata. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_cipher.c | 31 +++++++------------------------ drivers/crypto/ccree/cc_driver.h | 2 +- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index b020583dc35d..d219b5b5d7b4 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -20,10 +20,6 @@ #define template_skcipher template_u.skcipher -struct cc_cipher_handle { - struct list_head alg_list; -}; - struct cc_user_key_info { u8 *key; dma_addr_t key_dma_addr; @@ -1661,36 +1657,24 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl, int cc_cipher_free(struct cc_drvdata *drvdata) { struct cc_crypto_alg *t_alg, *n; - struct cc_cipher_handle *cipher_handle = drvdata->cipher_handle; - if (cipher_handle) { - /* Remove registered algs */ - list_for_each_entry_safe(t_alg, n, &cipher_handle->alg_list, - entry) { - crypto_unregister_skcipher(&t_alg->skcipher_alg); - list_del(&t_alg->entry); - kfree(t_alg); - } - kfree(cipher_handle); - drvdata->cipher_handle = NULL; + /* Remove registered algs */ + list_for_each_entry_safe(t_alg, n, &drvdata->alg_list, entry) { + crypto_unregister_skcipher(&t_alg->skcipher_alg); + list_del(&t_alg->entry); + kfree(t_alg); } return 0; } int cc_cipher_alloc(struct cc_drvdata *drvdata) { - struct cc_cipher_handle *cipher_handle; struct cc_crypto_alg *t_alg; struct device *dev = drvdata_to_dev(drvdata); int rc = -ENOMEM; int alg; - cipher_handle = kmalloc(sizeof(*cipher_handle), GFP_KERNEL); - if (!cipher_handle) - return -ENOMEM; - - INIT_LIST_HEAD(&cipher_handle->alg_list); - drvdata->cipher_handle = cipher_handle; + INIT_LIST_HEAD(&drvdata->alg_list); /* Linux crypto */ dev_dbg(dev, "Number of algorithms = %zu\n", @@ -1722,8 +1706,7 @@ int cc_cipher_alloc(struct cc_drvdata *drvdata) kfree(t_alg); goto fail0; } else { - list_add_tail(&t_alg->entry, - &cipher_handle->alg_list); + list_add_tail(&t_alg->entry, &drvdata->alg_list); dev_dbg(dev, "Registered %s\n", t_alg->skcipher_alg.base.cra_driver_name); } diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index 4895f124d2b8..4790eb5cb8bd 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -141,7 +141,7 @@ struct cc_drvdata { struct platform_device *plat_dev; u32 mlli_sram_addr; struct dma_pool *mlli_buffs_pool; - void *cipher_handle; + struct list_head alg_list; void *hash_handle; void *aead_handle; void *request_mgr_handle; From 08884316bb15e53a71116a8c4f75b6331de38373 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:16 +0100 Subject: [PATCH 052/162] crypto: ccree - extract cc_init_copy_sram() Extract the copy to SRAM of the initial values for a hash algorithm into its own function, to improve readability and ease maintenance. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_hash.c | 91 ++++++++++++++-------------------- 1 file changed, 36 insertions(+), 55 deletions(-) diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 0b179aafd484..f3adc1ab0e01 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -1865,104 +1865,85 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, return t_crypto_alg; } +static int cc_init_copy_sram(struct cc_drvdata *drvdata, const u32 *data, + unsigned int size, u32 *sram_buff_ofs) +{ + struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)]; + unsigned int larval_seq_len = 0; + int rc; + + cc_set_sram_desc(data, *sram_buff_ofs, size / sizeof(*data), + larval_seq, &larval_seq_len); + rc = send_request_init(drvdata, larval_seq, larval_seq_len); + if (rc) + return rc; + + *sram_buff_ofs += size; + return 0; +} + int cc_init_hash_sram(struct cc_drvdata *drvdata) { struct cc_hash_handle *hash_handle = drvdata->hash_handle; u32 sram_buff_ofs = hash_handle->digest_len_sram_addr; - unsigned int larval_seq_len = 0; - struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)]; bool large_sha_supported = (drvdata->hw_rev >= CC_HW_REV_712); bool sm3_supported = (drvdata->hw_rev >= CC_HW_REV_713); int rc = 0; /* Copy-to-sram digest-len */ - cc_set_sram_desc(cc_digest_len_init, sram_buff_ofs, - ARRAY_SIZE(cc_digest_len_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_digest_len_init, + sizeof(cc_digest_len_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_digest_len_init); - larval_seq_len = 0; - if (large_sha_supported) { /* Copy-to-sram digest-len for sha384/512 */ - cc_set_sram_desc(cc_digest_len_sha512_init, sram_buff_ofs, - ARRAY_SIZE(cc_digest_len_sha512_init), - larval_seq, &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_digest_len_sha512_init, + sizeof(cc_digest_len_sha512_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - - sram_buff_ofs += sizeof(cc_digest_len_sha512_init); - larval_seq_len = 0; } /* The initial digests offset */ hash_handle->larval_digest_sram_addr = sram_buff_ofs; /* Copy-to-sram initial SHA* digests */ - cc_set_sram_desc(cc_md5_init, sram_buff_ofs, ARRAY_SIZE(cc_md5_init), - larval_seq, &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_md5_init, sizeof(cc_md5_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_md5_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha1_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha1_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha1_init, sizeof(cc_sha1_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha1_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha224_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha224_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha224_init, sizeof(cc_sha224_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha224_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha256_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha256_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha256_init, sizeof(cc_sha256_init), + &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha256_init); - larval_seq_len = 0; if (sm3_supported) { - cc_set_sram_desc(cc_sm3_init, sram_buff_ofs, - ARRAY_SIZE(cc_sm3_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sm3_init, + sizeof(cc_sm3_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sm3_init); - larval_seq_len = 0; } if (large_sha_supported) { - cc_set_sram_desc(cc_sha384_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha384_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha384_init, + sizeof(cc_sha384_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; - sram_buff_ofs += sizeof(cc_sha384_init); - larval_seq_len = 0; - cc_set_sram_desc(cc_sha512_init, sram_buff_ofs, - ARRAY_SIZE(cc_sha512_init), larval_seq, - &larval_seq_len); - rc = send_request_init(drvdata, larval_seq, larval_seq_len); + rc = cc_init_copy_sram(drvdata, cc_sha512_init, + sizeof(cc_sha512_init), &sram_buff_ofs); if (rc) goto init_digest_const_err; } From 92816ab6b6c3eec0f493f079b311b745cc32319d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:17 +0100 Subject: [PATCH 053/162] crypto: ccree - remove bogus kerneldoc markers Normal comments should start with "/*". "/**" is reserver for kerneldoc. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 78afa753b44f..be26aaf75e1f 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -1223,7 +1223,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[], req_ctx->is_single_pass); if (req_ctx->is_single_pass) { - /** + /* * Single-pass flow */ cc_set_hmac_desc(req, desc, seq_size); @@ -1235,7 +1235,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[], return; } - /** + /* * Double-pass flow * Fallback for unsupported single-pass modes, * i.e. using assoc. data of non-word-multiple @@ -1276,7 +1276,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[], req_ctx->is_single_pass); if (req_ctx->is_single_pass) { - /** + /* * Single-pass flow */ cc_set_xcbc_desc(req, desc, seq_size); @@ -1287,7 +1287,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[], return; } - /** + /* * Double-pass flow * Fallback for unsupported single-pass modes, * i.e. using assoc. data of non-word-multiple From 67b74a4683893d40f8d2c143720f9954185f94e6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:18 +0100 Subject: [PATCH 054/162] crypto: ccree - improve kerneldoc in cc_hw_queue_defs.h Miscellaneous improvements: - Start comment blocks with "/**" to enable kerneldoc, - Fix descriptor type of set_dout_mlli(), - Fix copied config parameter of set_cipher_config1(), - Fix copied config parameter of set_bytes_swap(), - Add missing function names to kerneldoc headers, - Add missing parameter descriptions, - Remove descriptions for nonexistent parameters, - Add missing colons, - Remove references to obsolete camelcase parameter names, - Sort according to actual parameter order, - Fix grammar and spelling. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_hw_queue_defs.h | 237 ++++++++++++------------ 1 file changed, 119 insertions(+), 118 deletions(-) diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h index 36786344c57a..25ef28669989 100644 --- a/drivers/crypto/ccree/cc_hw_queue_defs.h +++ b/drivers/crypto/ccree/cc_hw_queue_defs.h @@ -207,31 +207,32 @@ enum cc_hash_cipher_pad { /* Descriptor packing macros */ /*****************************/ -/* - * Init a HW descriptor struct - * @pdesc: pointer HW descriptor struct +/** + * hw_desc_init() - Init a HW descriptor struct + * @pdesc: pointer to HW descriptor struct */ static inline void hw_desc_init(struct cc_hw_desc *pdesc) { memset(pdesc, 0, sizeof(struct cc_hw_desc)); } -/* - * Indicates the end of current HW descriptors flow and release the HW engines. +/** + * set_queue_last_ind_bit() - Indicate the end of current HW descriptors flow + * and release the HW engines. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_queue_last_ind_bit(struct cc_hw_desc *pdesc) { pdesc->word[3] |= FIELD_PREP(WORD3_QUEUE_LAST_IND, 1); } -/* - * Set the DIN field of a HW descriptors +/** + * set_din_type() - Set the DIN field of a HW descriptor * - * @pdesc: pointer HW descriptor struct - * @dma_mode: dmaMode The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT - * @addr: dinAdr DIN address + * @pdesc: Pointer to HW descriptor struct + * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT + * @addr: DIN address * @size: Data size in bytes * @axi_sec: AXI secure bit */ @@ -248,11 +249,11 @@ static inline void set_din_type(struct cc_hw_desc *pdesc, FIELD_PREP(WORD1_NS_BIT, axi_sec); } -/* - * Set the DIN field of a HW descriptors to NO DMA mode. +/** + * set_din_no_dma() - Set the DIN field of a HW descriptor to NO DMA mode. * Used for NOP descriptor, register patches and other special modes. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DIN address * @size: Data size in bytes */ @@ -262,14 +263,11 @@ static inline void set_din_no_dma(struct cc_hw_desc *pdesc, u32 addr, u32 size) pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size); } -/* - * Setup the special CPP descriptor +/** + * set_cpp_crypto_key() - Setup the special CPP descriptor * - * @pdesc: pointer HW descriptor struct - * @alg: cipher used (AES / SM4) - * @mode: mode used (CTR or CBC) - * @slot: slot number - * @ksize: key size + * @pdesc: Pointer to HW descriptor struct + * @slot: Slot number */ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot) { @@ -281,14 +279,14 @@ static inline void set_cpp_crypto_key(struct cc_hw_desc *pdesc, u8 slot) pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, slot); } -/* - * Set the DIN field of a HW descriptors to SRAM mode. +/** + * set_din_sram() - Set the DIN field of a HW descriptor to SRAM mode. * Note: No need to check SRAM alignment since host requests do not use SRAM and - * adaptor will enforce alignment check. + * the adaptor will enforce alignment checks. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DIN address - * @size Data size in bytes + * @size: Data size in bytes */ static inline void set_din_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size) { @@ -297,10 +295,10 @@ static inline void set_din_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size) FIELD_PREP(WORD1_DIN_DMA_MODE, DMA_SRAM); } -/* - * Set the DIN field of a HW descriptors to CONST mode +/** + * set_din_const() - Set the DIN field of a HW descriptor to CONST mode * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @val: DIN const value * @size: Data size in bytes */ @@ -312,20 +310,20 @@ static inline void set_din_const(struct cc_hw_desc *pdesc, u32 val, u32 size) FIELD_PREP(WORD1_DIN_SIZE, size); } -/* - * Set the DIN not last input data indicator +/** + * set_din_not_last_indication() - Set the DIN not last input data indicator * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_din_not_last_indication(struct cc_hw_desc *pdesc) { pdesc->word[1] |= FIELD_PREP(WORD1_NOT_LAST, 1); } -/* - * Set the DOUT field of a HW descriptors +/** + * set_dout_type() - Set the DOUT field of a HW descriptor * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT * @addr: DOUT address * @size: Data size in bytes @@ -344,15 +342,15 @@ static inline void set_dout_type(struct cc_hw_desc *pdesc, FIELD_PREP(WORD3_NS_BIT, axi_sec); } -/* - * Set the DOUT field of a HW descriptors to DLLI type +/** + * set_dout_dlli() - Set the DOUT field of a HW descriptor to DLLI type * The LAST INDICATION is provided by the user * - * @pdesc pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes - * @last_ind: The last indication bit * @axi_sec: AXI secure bit + * @last_ind: The last indication bit */ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr, u32 size, enum cc_axi_sec axi_sec, @@ -362,15 +360,15 @@ static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr, pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind); } -/* - * Set the DOUT field of a HW descriptors to DLLI type +/** + * set_dout_mlli() - Set the DOUT field of a HW descriptor to MLLI type * The LAST INDICATION is provided by the user * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes - * @last_ind: The last indication bit * @axi_sec: AXI secure bit + * @last_ind: The last indication bit */ static inline void set_dout_mlli(struct cc_hw_desc *pdesc, u32 addr, u32 size, enum cc_axi_sec axi_sec, bool last_ind) @@ -379,11 +377,11 @@ static inline void set_dout_mlli(struct cc_hw_desc *pdesc, u32 addr, u32 size, pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind); } -/* - * Set the DOUT field of a HW descriptors to NO DMA mode. +/** + * set_dout_no_dma() - Set the DOUT field of a HW descriptor to NO DMA mode. * Used for NOP descriptor, register patches and other special modes. * - * @pdesc: pointer HW descriptor struct + * @pdesc: pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes * @write_enable: Enables a write operation to a register @@ -396,54 +394,55 @@ static inline void set_dout_no_dma(struct cc_hw_desc *pdesc, u32 addr, FIELD_PREP(WORD3_DOUT_LAST_IND, write_enable); } -/* - * Set the word for the XOR operation. +/** + * set_xor_val() - Set the word for the XOR operation. * - * @pdesc: pointer HW descriptor struct - * @val: xor data value + * @pdesc: Pointer to HW descriptor struct + * @val: XOR data value */ static inline void set_xor_val(struct cc_hw_desc *pdesc, u32 val) { pdesc->word[2] = val; } -/* - * Sets the XOR indicator bit in the descriptor +/** + * set_xor_active() - Set the XOR indicator bit in the descriptor * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_xor_active(struct cc_hw_desc *pdesc) { pdesc->word[3] |= FIELD_PREP(WORD3_HASH_XOR_BIT, 1); } -/* - * Select the AES engine instead of HASH engine when setting up combined mode - * with AES XCBC MAC +/** + * set_aes_not_hash_mode() - Select the AES engine instead of HASH engine when + * setting up combined mode with AES XCBC MAC * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_aes_not_hash_mode(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_AES_SEL_N_HASH, 1); } -/* - * Set aes xor crypto key, this in some secenrios select SM3 engine +/** + * set_aes_xor_crypto_key() - Set aes xor crypto key, which in some scenarios + * selects the SM3 engine * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_aes_xor_crypto_key(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_AES_XOR_CRYPTO_KEY, 1); } -/* - * Set the DOUT field of a HW descriptors to SRAM mode +/** + * set_dout_sram() - Set the DOUT field of a HW descriptor to SRAM mode * Note: No need to check SRAM alignment since host requests do not use SRAM and - * adaptor will enforce alignment check. + * the adaptor will enforce alignment checks. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @addr: DOUT address * @size: Data size in bytes */ @@ -454,32 +453,34 @@ static inline void set_dout_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size) FIELD_PREP(WORD3_DOUT_SIZE, size); } -/* - * Sets the data unit size for XEX mode in data_out_addr[15:0] +/** + * set_xex_data_unit_size() - Set the data unit size for XEX mode in + * data_out_addr[15:0] * - * @pdesc: pDesc pointer HW descriptor struct - * @size: data unit size for XEX mode + * @pdesc: Pointer to HW descriptor struct + * @size: Data unit size for XEX mode */ static inline void set_xex_data_unit_size(struct cc_hw_desc *pdesc, u32 size) { pdesc->word[2] = size; } -/* - * Set the number of rounds for Multi2 in data_out_addr[15:0] +/** + * set_multi2_num_rounds() - Set the number of rounds for Multi2 in + * data_out_addr[15:0] * - * @pdesc: pointer HW descriptor struct - * @num: number of rounds for Multi2 + * @pdesc: Pointer to HW descriptor struct + * @num: Number of rounds for Multi2 */ static inline void set_multi2_num_rounds(struct cc_hw_desc *pdesc, u32 num) { pdesc->word[2] = num; } -/* - * Set the flow mode. +/** + * set_flow_mode() - Set the flow mode. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_flow_mode(struct cc_hw_desc *pdesc, @@ -488,22 +489,22 @@ static inline void set_flow_mode(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_DATA_FLOW_MODE, mode); } -/* - * Set the cipher mode. +/** + * set_cipher_mode() - Set the cipher mode. * - * @pdesc: pointer HW descriptor struct - * @mode: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode) { pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode); } -/* - * Set the cipher mode for hash algorithms. +/** + * set_hash_cipher_mode() - Set the cipher mode for hash algorithms. * - * @pdesc: pointer HW descriptor struct - * @cipher_mode: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @cipher_mode: Any one of the modes defined in [CC7x-DESC] * @hash_mode: specifies which hash is being handled */ static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc, @@ -515,10 +516,10 @@ static inline void set_hash_cipher_mode(struct cc_hw_desc *pdesc, set_aes_xor_crypto_key(pdesc); } -/* - * Set the cipher configuration fields. +/** + * set_cipher_config0() - Set the cipher configuration fields. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode) @@ -526,11 +527,11 @@ static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode) pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode); } -/* - * Set the cipher configuration fields. +/** + * set_cipher_config1() - Set the cipher configuration fields. * - * @pdesc: pointer HW descriptor struct - * @config: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @config: Padding mode */ static inline void set_cipher_config1(struct cc_hw_desc *pdesc, enum cc_hash_conf_pad config) @@ -538,10 +539,10 @@ static inline void set_cipher_config1(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF1, config); } -/* - * Set HW key configuration fields. +/** + * set_hw_crypto_key() - Set HW key configuration fields. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @hw_key: The HW key slot asdefined in enum cc_hw_crypto_key */ static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc, @@ -553,64 +554,64 @@ static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc, (hw_key >> HW_KEY_SHIFT_CIPHER_CFG2)); } -/* - * Set byte order of all setup-finalize descriptors. +/** + * set_bytes_swap() - Set byte order of all setup-finalize descriptors. * - * @pdesc: pointer HW descriptor struct - * @config: Any one of the modes defined in [CC7x-DESC] + * @pdesc: Pointer to HW descriptor struct + * @config: True to enable byte swapping */ static inline void set_bytes_swap(struct cc_hw_desc *pdesc, bool config) { pdesc->word[4] |= FIELD_PREP(WORD4_BYTES_SWAP, config); } -/* - * Set CMAC_SIZE0 mode. +/** + * set_cmac_size0_mode() - Set CMAC_SIZE0 mode. * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct */ static inline void set_cmac_size0_mode(struct cc_hw_desc *pdesc) { pdesc->word[4] |= FIELD_PREP(WORD4_CMAC_SIZE0, 1); } -/* - * Set key size descriptor field. +/** + * set_key_size() - Set key size descriptor field. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size(struct cc_hw_desc *pdesc, u32 size) { pdesc->word[4] |= FIELD_PREP(WORD4_KEY_SIZE, size); } -/* - * Set AES key size. +/** + * set_key_size_aes() - Set AES key size. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size_aes(struct cc_hw_desc *pdesc, u32 size) { set_key_size(pdesc, ((size >> 3) - 2)); } -/* - * Set DES key size. +/** + * set_key_size_des() - Set DES key size. * - * @pdesc: pointer HW descriptor struct - * @size: key size in bytes (NOT size code) + * @pdesc: Pointer to HW descriptor struct + * @size: Key size in bytes (NOT size code) */ static inline void set_key_size_des(struct cc_hw_desc *pdesc, u32 size) { set_key_size(pdesc, ((size >> 3) - 1)); } -/* - * Set the descriptor setup mode +/** + * set_setup_mode() - Set the descriptor setup mode * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @mode: Any one of the setup modes defined in [CC7x-DESC] */ static inline void set_setup_mode(struct cc_hw_desc *pdesc, @@ -619,10 +620,10 @@ static inline void set_setup_mode(struct cc_hw_desc *pdesc, pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, mode); } -/* - * Set the descriptor cipher DO +/** + * set_cipher_do() - Set the descriptor cipher DO * - * @pdesc: pointer HW descriptor struct + * @pdesc: Pointer to HW descriptor struct * @config: Any one of the cipher do defined in [CC7x-DESC] */ static inline void set_cipher_do(struct cc_hw_desc *pdesc, From ae02fcfeac13cfceb8c54b826e0b914abb986655 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:19 +0100 Subject: [PATCH 055/162] crypto: ccree - improve kerneldoc in cc_buffer_mgr.c Miscellaneous improvements: - Add missing parameter and return value descriptions. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index f2e782d2be15..1ea4812e9354 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -73,9 +73,13 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req, /** * cc_get_sgl_nents() - Get scatterlist number of entries. * + * @dev: Device object * @sg_list: SG list * @nbytes: [IN] Total SGL data bytes. * @lbytes: [OUT] Returns the amount of bytes at the last entry + * + * Return: + * Number of entries in the scatterlist */ static unsigned int cc_get_sgl_nents(struct device *dev, struct scatterlist *sg_list, @@ -102,11 +106,13 @@ static unsigned int cc_get_sgl_nents(struct device *dev, * cc_copy_sg_portion() - Copy scatter list data, * from to_skip to end, to dest and vice versa * - * @dest: - * @sg: - * @to_skip: - * @end: - * @direct: + * @dev: Device object + * @dest: Buffer to copy to/from + * @sg: SG list + * @to_skip: Number of bytes to skip before copying + * @end: Offset of last byte to copy + * @direct: Transfer direction (true == from SG list to buffer, false == from + * buffer to SG list) */ void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg, u32 to_skip, u32 end, enum cc_sg_cpy_direct direct) From dc16c9f764721019df7a8a9becdab4a48b95cbc3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:20 +0100 Subject: [PATCH 056/162] crypto: ccree - improve kerneldoc in cc_hash.[ch] Miscellaneous improvements: - Start comment blocks with "/**" to enable kerneldoc, - Mark parameters using "@" instead of "\param", - Add missing function names to kerneldoc headers, - Add missing parameter descriptions. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_hash.c | 11 ++++++----- drivers/crypto/ccree/cc_hash.h | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index f3adc1ab0e01..738f0debee98 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -2238,14 +2238,15 @@ static const void *cc_larval_digest(struct device *dev, u32 mode) } } -/*! - * Gets the address of the initial digest in SRAM +/** + * cc_larval_digest_addr() - Get the address of the initial digest in SRAM * according to the given hash mode * - * \param drvdata - * \param mode The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256 * - * \return u32 The address of the initial digest in SRAM + * Return: + * The address of the initial digest in SRAM */ u32 cc_larval_digest_addr(void *drvdata, u32 mode) { diff --git a/drivers/crypto/ccree/cc_hash.h b/drivers/crypto/ccree/cc_hash.h index d76ecae996ca..3d0f2179e07e 100644 --- a/drivers/crypto/ccree/cc_hash.h +++ b/drivers/crypto/ccree/cc_hash.h @@ -80,26 +80,26 @@ int cc_hash_alloc(struct cc_drvdata *drvdata); int cc_init_hash_sram(struct cc_drvdata *drvdata); int cc_hash_free(struct cc_drvdata *drvdata); -/*! - * Gets the initial digest length +/** + * cc_digest_len_addr() - Gets the initial digest length * - * \param drvdata - * \param mode The Hash mode. Supported modes: - * MD5/SHA1/SHA224/SHA256/SHA384/SHA512 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512 * - * \return u32 returns the address of the initial digest length in SRAM + * Return: + * Returns the address of the initial digest length in SRAM */ u32 cc_digest_len_addr(void *drvdata, u32 mode); -/*! - * Gets the address of the initial digest in SRAM +/** + * cc_larval_digest_addr() - Gets the address of the initial digest in SRAM * according to the given hash mode * - * \param drvdata - * \param mode The Hash mode. Supported modes: - * MD5/SHA1/SHA224/SHA256/SHA384/SHA512 + * @drvdata: Associated device driver context + * @mode: The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256/SHA384/SHA512 * - * \return u32 The address of the initial digest in SRAM + * Return: + * The address of the initial digest in SRAM */ u32 cc_larval_digest_addr(void *drvdata, u32 mode); From 5c68361f5746b036d8aa55222f910a5a6fc86e8e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:21 +0100 Subject: [PATCH 057/162] crypto: ccree - improve kerneldoc in cc_request_mgr.[ch] Miscellaneous improvements: - Start comment blocks with "/**" to enable kerneldoc, - Mark parameters using "@" instead of "\param", - Fix copied is_dout parameter of cc_send_request(), - Add missing function names to kerneldoc headers, - Add missing parameter descriptions. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_request_mgr.c | 41 ++++++++++++++------------- drivers/crypto/ccree/cc_request_mgr.h | 19 ++++++------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c index b2a18122f320..2b8a9f5a5bd3 100644 --- a/drivers/crypto/ccree/cc_request_mgr.c +++ b/drivers/crypto/ccree/cc_request_mgr.c @@ -206,12 +206,13 @@ static void enqueue_seq(struct cc_drvdata *drvdata, struct cc_hw_desc seq[], } } -/*! - * Completion will take place if and only if user requested completion - * by cc_send_sync_request(). +/** + * request_mgr_complete() - Completion will take place if and only if user + * requested completion by cc_send_sync_request(). * - * \param dev - * \param dx_compl_h The completion event to signal + * @dev: Device pointer + * @dx_compl_h: The completion event to signal + * @dummy: unused error code */ static void request_mgr_complete(struct device *dev, void *dx_compl_h, int dummy) @@ -264,15 +265,15 @@ static int cc_queues_status(struct cc_drvdata *drvdata, return -ENOSPC; } -/*! - * Enqueue caller request to crypto hardware. +/** + * cc_do_send_request() - Enqueue caller request to crypto hardware. * Need to be called with HW lock held and PM running * - * \param drvdata - * \param cc_req The request to enqueue - * \param desc The crypto sequence - * \param len The crypto sequence length - * \param add_comp If "true": add an artificial dout DMA to mark completion + * @drvdata: Associated device driver context + * @cc_req: The request to enqueue + * @desc: The crypto sequence + * @len: The crypto sequence length + * @add_comp: If "true": add an artificial dout DMA to mark completion * */ static void cc_do_send_request(struct cc_drvdata *drvdata, @@ -486,16 +487,18 @@ int cc_send_sync_request(struct cc_drvdata *drvdata, return 0; } -/*! - * Enqueue caller request to crypto hardware during init process. - * assume this function is not called in middle of a flow, +/** + * send_request_init() - Enqueue caller request to crypto hardware during init + * process. + * Assume this function is not called in the middle of a flow, * since we set QUEUE_LAST_IND flag in the last descriptor. * - * \param drvdata - * \param desc The crypto sequence - * \param len The crypto sequence length + * @drvdata: Associated device driver context + * @desc: The crypto sequence + * @len: The crypto sequence length * - * \return int Returns "0" upon success + * Return: + * Returns "0" upon success */ int send_request_init(struct cc_drvdata *drvdata, struct cc_hw_desc *desc, unsigned int len) diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h index ff7746aaaf35..ae25ca843dce 100644 --- a/drivers/crypto/ccree/cc_request_mgr.h +++ b/drivers/crypto/ccree/cc_request_mgr.h @@ -12,18 +12,17 @@ int cc_req_mgr_init(struct cc_drvdata *drvdata); -/*! - * Enqueue caller request to crypto hardware. +/** + * cc_send_request() - Enqueue caller request to crypto hardware. * - * \param drvdata - * \param cc_req The request to enqueue - * \param desc The crypto sequence - * \param len The crypto sequence length - * \param is_dout If "true": completion is handled by the caller - * If "false": this function adds a dummy descriptor completion - * and waits upon completion signal. + * @drvdata: Associated device driver context + * @cc_req: The request to enqueue + * @desc: The crypto sequence + * @len: The crypto sequence length + * @req: Asynchronous crypto request * - * \return int Returns -EINPROGRESS or error + * Return: + * Returns -EINPROGRESS or error */ int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req, struct cc_hw_desc *desc, unsigned int len, From 31568ab8323afe78b18f74e676e52dd1b1584461 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:22 +0100 Subject: [PATCH 058/162] crypto: ccree - improve kerneldoc in cc_sram_mgr.[ch] Miscellaneous improvements: - Start comment blocks with "/**" to enable kerneldoc, - Mark parameters using "@" instead of "\param", - Fix typos in parameter names, - Add missing function names to kerneldoc headers, - Add missing parameter and return value descriptions. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_sram_mgr.c | 16 +++++++++++----- drivers/crypto/ccree/cc_sram_mgr.h | 22 +++++++++++++--------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c index 38f36cbc05b3..37a95856361f 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.c +++ b/drivers/crypto/ccree/cc_sram_mgr.c @@ -10,6 +10,9 @@ * Returns zero for success, negative value otherwise. * * @drvdata: Associated device driver context + * + * Return: + * 0 for success, negative error code for failure. */ int cc_sram_mgr_init(struct cc_drvdata *drvdata) { @@ -29,11 +32,14 @@ int cc_sram_mgr_init(struct cc_drvdata *drvdata) return 0; } -/*! - * Allocated buffer from SRAM pool. +/** + * cc_sram_alloc() - Allocate buffer from SRAM pool. * - * \param drvdata - * \param size The requested bytes to allocate + * @drvdata: Associated device driver context + * @size: The requested numer of bytes to allocate + * + * Return: + * Address offset in SRAM or NULL_SRAM_ADDR for failure. */ u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) { @@ -64,7 +70,7 @@ u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size) * * @src: A pointer to array of words to set as consts. * @dst: The target SRAM buffer to set into - * @nelements: The number of words in "src" array + * @nelement: The number of words in "src" array * @seq: A pointer to the given IN/OUT descriptor sequence * @seq_len: A pointer to the given IN/OUT sequence length */ diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h index 04a857259d41..1c965ef83002 100644 --- a/drivers/crypto/ccree/cc_sram_mgr.h +++ b/drivers/crypto/ccree/cc_sram_mgr.h @@ -12,22 +12,26 @@ struct cc_drvdata; #define NULL_SRAM_ADDR ((u32)-1) -/*! - * Initializes SRAM pool. +/** + * cc_sram_mgr_init() - Initializes SRAM pool. * The first X bytes of SRAM are reserved for ROM usage, hence, pool * starts right after X bytes. * - * \param drvdata + * @drvdata: Associated device driver context * - * \return int Zero for success, negative value otherwise. + * Return: + * Zero for success, negative value otherwise. */ int cc_sram_mgr_init(struct cc_drvdata *drvdata); -/*! - * Allocate buffer from SRAM pool. +/** + * cc_sram_alloc() - Allocate buffer from SRAM pool. * - * \param drvdata - * \param size The requested bytes to allocate + * @drvdata: Associated device driver context + * @size: The requested bytes to allocate + * + * Return: + * Address offset in SRAM or NULL_SRAM_ADDR for failure. */ u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); @@ -38,7 +42,7 @@ u32 cc_sram_alloc(struct cc_drvdata *drvdata, u32 size); * * @src: A pointer to array of words to set as consts. * @dst: The target SRAM buffer to set into - * @nelements: The number of words in "src" array + * @nelement: The number of words in "src" array * @seq: A pointer to the given IN/OUT descriptor sequence * @seq_len: A pointer to the given IN/OUT sequence length */ From 3b1cbdac327c6377edee1e2c956e1623e27dd133 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:23 +0100 Subject: [PATCH 059/162] crypto: ccree - spelling s/Crytpcell/Cryptocell/ Fix a typo in a comment. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index 02442596310c..bd9db53c68ab 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -367,7 +367,7 @@ static int init_cc_resources(struct platform_device *plat_dev) goto post_pm_err; } - /* wait for Crytpcell reset completion */ + /* Wait for Cryptocell reset completion */ if (!cc_wait_for_reset_completion(new_drvdata)) { dev_err(dev, "Cryptocell reset not completed"); } From 24b540f649a932a110b988c26165b8216e8d859e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:24 +0100 Subject: [PATCH 060/162] crypto: ccree - grammar s/not room/no room/ Fix grammar in a comment. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_request_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c index 2b8a9f5a5bd3..2671cffb3b58 100644 --- a/drivers/crypto/ccree/cc_request_mgr.c +++ b/drivers/crypto/ccree/cc_request_mgr.c @@ -378,7 +378,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata) rc = cc_queues_status(drvdata, mgr, bli->len); if (rc) { /* - * There is still not room in the FIFO for + * There is still no room in the FIFO for * this request. Bail out. We'll return here * on the next completion irq. */ From 343ee6c4405c2d053f093f68835ecff0cc906a04 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:25 +0100 Subject: [PATCH 061/162] crypto: ccree - use existing dev helper in init_cc_resources() Use the existing dev helper variable instead of plat_dev->dev. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index bd9db53c68ab..2d50991b9a17 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -331,13 +331,13 @@ static int init_cc_resources(struct platform_device *plat_dev) init_completion(&new_drvdata->hw_queue_avail); - if (!plat_dev->dev.dma_mask) - plat_dev->dev.dma_mask = &plat_dev->dev.coherent_dma_mask; + if (!dev->dma_mask) + dev->dma_mask = &dev->coherent_dma_mask; dma_mask = DMA_BIT_MASK(DMA_BIT_MASK_LEN); while (dma_mask > 0x7fffffffUL) { - if (dma_supported(&plat_dev->dev, dma_mask)) { - rc = dma_set_coherent_mask(&plat_dev->dev, dma_mask); + if (dma_supported(dev, dma_mask)) { + rc = dma_set_coherent_mask(dev, dma_mask); if (!rc) break; } From ff4d719a5bc7e27eac7002ea6eaea05fa2deb87a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:26 +0100 Subject: [PATCH 062/162] crypto: ccree - use devm_k[mz]alloc() for AEAD data As the lifetime of the AEAD data matches the lifetime of the driver, AEAD data can be allocated using the managed allocators. While at it, simplify cc_aead_free() by removing an unneeded cast, and an unneeded check (aead_handle is always valid here). Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 33 ++++++++++++-------------------- drivers/crypto/ccree/cc_cipher.c | 8 ++++---- drivers/crypto/ccree/cc_hash.c | 9 ++++----- 3 files changed, 20 insertions(+), 30 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index be26aaf75e1f..904aa66752b3 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -2615,7 +2615,7 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, struct cc_crypto_alg *t_alg; struct aead_alg *alg; - t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL); + t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL); if (!t_alg) return ERR_PTR(-ENOMEM); @@ -2645,19 +2645,12 @@ static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl, int cc_aead_free(struct cc_drvdata *drvdata) { struct cc_crypto_alg *t_alg, *n; - struct cc_aead_handle *aead_handle = - (struct cc_aead_handle *)drvdata->aead_handle; + struct cc_aead_handle *aead_handle = drvdata->aead_handle; - if (aead_handle) { - /* Remove registered algs */ - list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list, - entry) { - crypto_unregister_aead(&t_alg->aead_alg); - list_del(&t_alg->entry); - kfree(t_alg); - } - kfree(aead_handle); - drvdata->aead_handle = NULL; + /* Remove registered algs */ + list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list, entry) { + crypto_unregister_aead(&t_alg->aead_alg); + list_del(&t_alg->entry); } return 0; @@ -2671,7 +2664,7 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) int alg; struct device *dev = drvdata_to_dev(drvdata); - aead_handle = kmalloc(sizeof(*aead_handle), GFP_KERNEL); + aead_handle = devm_kmalloc(dev, sizeof(*aead_handle), GFP_KERNEL); if (!aead_handle) { rc = -ENOMEM; goto fail0; @@ -2706,18 +2699,16 @@ int cc_aead_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", t_alg->aead_alg.base.cra_driver_name); - goto fail2; - } else { - list_add_tail(&t_alg->entry, &aead_handle->aead_list); - dev_dbg(dev, "Registered %s\n", - t_alg->aead_alg.base.cra_driver_name); + goto fail1; } + + list_add_tail(&t_alg->entry, &aead_handle->aead_list); + dev_dbg(dev, "Registered %s\n", + t_alg->aead_alg.base.cra_driver_name); } return 0; -fail2: - kfree(t_alg); fail1: cc_aead_free(drvdata); fail0: diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index d219b5b5d7b4..fc6df7183024 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -1705,11 +1705,11 @@ int cc_cipher_alloc(struct cc_drvdata *drvdata) t_alg->skcipher_alg.base.cra_driver_name); kfree(t_alg); goto fail0; - } else { - list_add_tail(&t_alg->entry, &drvdata->alg_list); - dev_dbg(dev, "Registered %s\n", - t_alg->skcipher_alg.base.cra_driver_name); } + + list_add_tail(&t_alg->entry, &drvdata->alg_list); + dev_dbg(dev, "Registered %s\n", + t_alg->skcipher_alg.base.cra_driver_name); } return 0; diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 738f0debee98..2ca5532c07a2 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -2024,10 +2024,9 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) driver_hash[alg].driver_name); kfree(t_alg); goto fail; - } else { - list_add_tail(&t_alg->entry, - &hash_handle->hash_list); } + + list_add_tail(&t_alg->entry, &hash_handle->hash_list); } if (hw_mode == DRV_CIPHER_XCBC_MAC || hw_mode == DRV_CIPHER_CMAC) @@ -2049,9 +2048,9 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) driver_hash[alg].driver_name); kfree(t_alg); goto fail; - } else { - list_add_tail(&t_alg->entry, &hash_handle->hash_list); } + + list_add_tail(&t_alg->entry, &hash_handle->hash_list); } return 0; From f7c8f992013da1afad6a353b7c73ea35c014d507 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:27 +0100 Subject: [PATCH 063/162] crypto: ccree - use devm_k[mz]alloc() for cipher data As the lifetime of the cipher data matches the lifetime of the driver, cipher data can be allocated using the managed allocators. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_cipher.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index fc6df7183024..d8e6a103a637 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -1626,7 +1626,7 @@ static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl, struct cc_crypto_alg *t_alg; struct skcipher_alg *alg; - t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL); + t_alg = devm_kzalloc(dev, sizeof(*t_alg), GFP_KERNEL); if (!t_alg) return ERR_PTR(-ENOMEM); @@ -1662,7 +1662,6 @@ int cc_cipher_free(struct cc_drvdata *drvdata) list_for_each_entry_safe(t_alg, n, &drvdata->alg_list, entry) { crypto_unregister_skcipher(&t_alg->skcipher_alg); list_del(&t_alg->entry); - kfree(t_alg); } return 0; } @@ -1703,7 +1702,6 @@ int cc_cipher_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", t_alg->skcipher_alg.base.cra_driver_name); - kfree(t_alg); goto fail0; } From 91fc6c7b3328c6f72a37e7bc319eb8419f109be3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Feb 2020 19:19:28 +0100 Subject: [PATCH 064/162] crypto: ccree - use devm_kzalloc() for hash data As the lifetime of the hash data matches the lifetime of the driver, hash data can be allocated using the managed allocators. While at it, simplify cc_hash_free() by removing an unneeded check (hash_handle is always valid here). Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_hash.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 2ca5532c07a2..0c32aa2e6801 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -1828,7 +1828,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, struct crypto_alg *alg; struct ahash_alg *halg; - t_crypto_alg = kzalloc(sizeof(*t_crypto_alg), GFP_KERNEL); + t_crypto_alg = devm_kzalloc(dev, sizeof(*t_crypto_alg), GFP_KERNEL); if (!t_crypto_alg) return ERR_PTR(-ENOMEM); @@ -1961,7 +1961,7 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) int rc = 0; int alg; - hash_handle = kzalloc(sizeof(*hash_handle), GFP_KERNEL); + hash_handle = devm_kzalloc(dev, sizeof(*hash_handle), GFP_KERNEL); if (!hash_handle) return -ENOMEM; @@ -2022,7 +2022,6 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", driver_hash[alg].driver_name); - kfree(t_alg); goto fail; } @@ -2046,7 +2045,6 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) if (rc) { dev_err(dev, "%s alg registration failed\n", driver_hash[alg].driver_name); - kfree(t_alg); goto fail; } @@ -2056,8 +2054,7 @@ int cc_hash_alloc(struct cc_drvdata *drvdata) return 0; fail: - kfree(drvdata->hash_handle); - drvdata->hash_handle = NULL; + cc_hash_free(drvdata); return rc; } @@ -2066,17 +2063,12 @@ int cc_hash_free(struct cc_drvdata *drvdata) struct cc_hash_alg *t_hash_alg, *hash_n; struct cc_hash_handle *hash_handle = drvdata->hash_handle; - if (hash_handle) { - list_for_each_entry_safe(t_hash_alg, hash_n, - &hash_handle->hash_list, entry) { - crypto_unregister_ahash(&t_hash_alg->ahash_alg); - list_del(&t_hash_alg->entry); - kfree(t_hash_alg); - } - - kfree(hash_handle); - drvdata->hash_handle = NULL; + list_for_each_entry_safe(t_hash_alg, hash_n, &hash_handle->hash_list, + entry) { + crypto_unregister_ahash(&t_hash_alg->ahash_alg); + list_del(&t_hash_alg->entry); } + return 0; } From e68303c6fe8e4d07785c111874665083bf05e420 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Feb 2020 09:45:36 +0100 Subject: [PATCH 065/162] crypto: qat - spelling s/Decrytp/Decrypt/ Fix a typo in a comment. Signed-off-by: Geert Uytterhoeven Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 833bb1d3a11b..e14d3dd291f0 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -97,7 +97,7 @@ struct qat_alg_cd { struct icp_qat_hw_cipher_algo_blk cipher; struct icp_qat_hw_auth_algo_blk hash; } qat_enc_cd; - struct qat_dec { /* Decrytp content desc */ + struct qat_dec { /* Decrypt content desc */ struct icp_qat_hw_auth_algo_blk hash; struct icp_qat_hw_cipher_algo_blk cipher; } qat_dec_cd; From b7f17fe281442de420cbd9b4a6ad34b1d498aec6 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:16 +0200 Subject: [PATCH 066/162] crypto: caam - refactor skcipher/aead/gcm/chachapoly {en,de}crypt functions Create a common crypt function for each skcipher/aead/gcm/chachapoly algorithms and call it for encrypt/decrypt with the specific boolean - true for encrypt and false for decrypt. Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 320 +++++++++------------------------- 1 file changed, 79 insertions(+), 241 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ef1a65f4fc92..30fca375b9b4 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -941,8 +941,8 @@ static void skcipher_unmap(struct device *dev, struct skcipher_edesc *edesc, edesc->sec4_sg_dma, edesc->sec4_sg_bytes); } -static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void aead_crypt_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { struct aead_request *req = context; struct aead_edesc *edesc; @@ -962,69 +962,8 @@ static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, aead_request_complete(req, ecode); } -static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) -{ - struct aead_request *req = context; - struct aead_edesc *edesc; - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct aead_edesc, hw_desc[0]); - - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - aead_unmap(jrdev, edesc, req); - - kfree(edesc); - - aead_request_complete(req, ecode); -} - -static void skcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) -{ - struct skcipher_request *req = context; - struct skcipher_edesc *edesc; - struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - int ivsize = crypto_skcipher_ivsize(skcipher); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]); - - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - skcipher_unmap(jrdev, edesc, req); - - /* - * The crypto API expects us to set the IV (req->iv) to the last - * ciphertext block (CBC mode) or last counter (CTR mode). - * This is used e.g. by the CTS mode. - */ - if (ivsize && !ecode) { - memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes, - ivsize); - print_hex_dump_debug("dstiv @"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->iv, - edesc->src_nents > 1 ? 100 : ivsize, 1); - } - - caam_dump_sg("dst @" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->dst, - edesc->dst_nents > 1 ? 100 : req->cryptlen, 1); - - kfree(edesc); - - skcipher_request_complete(req, ecode); -} - -static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { struct skcipher_request *req = context; struct skcipher_edesc *edesc; @@ -1436,7 +1375,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, return edesc; } -static int gcm_encrypt(struct aead_request *req) +static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -1444,22 +1383,21 @@ static int gcm_encrypt(struct aead_request *req) struct device *jrdev = ctx->jrdev; bool all_contig; u32 *desc; - int ret = 0; + int ret; - /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, true); + edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, + encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); - /* Create and submit job descriptor */ - init_gcm_job(req, edesc, all_contig, true); - - print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); + + init_chachapoly_job(req, edesc, all_contig, encrypt); + print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), + 1); + + ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); if (!ret) { ret = -EINPROGRESS; } else { @@ -1471,6 +1409,16 @@ static int gcm_encrypt(struct aead_request *req) } static int chachapoly_encrypt(struct aead_request *req) +{ + return chachapoly_crypt(req, true); +} + +static int chachapoly_decrypt(struct aead_request *req) +{ + return chachapoly_crypt(req, false); +} + +static inline int aead_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -1478,21 +1426,23 @@ static int chachapoly_encrypt(struct aead_request *req) struct device *jrdev = ctx->jrdev; bool all_contig; u32 *desc; - int ret; + int ret = 0; - edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, - true); + /* allocate extended descriptor */ + edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, + &all_contig, encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); + /* Create and submit job descriptor */ + init_authenc_job(req, edesc, all_contig, encrypt); + + print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); + desc = edesc->hw_desc; - - init_chachapoly_job(req, edesc, all_contig, true); - print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), - 1); - - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); + ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); if (!ret) { ret = -EINPROGRESS; } else { @@ -1503,7 +1453,17 @@ static int chachapoly_encrypt(struct aead_request *req) return ret; } -static int chachapoly_decrypt(struct aead_request *req) +static int aead_encrypt(struct aead_request *req) +{ + return aead_crypt(req, true); +} + +static int aead_decrypt(struct aead_request *req) +{ + return aead_crypt(req, false); +} + +static inline int gcm_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -1511,21 +1471,23 @@ static int chachapoly_decrypt(struct aead_request *req) struct device *jrdev = ctx->jrdev; bool all_contig; u32 *desc; - int ret; + int ret = 0; - edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, - false); + /* allocate extended descriptor */ + edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, + encrypt); if (IS_ERR(edesc)) return PTR_ERR(edesc); + /* Create and submit job descriptor */ + init_gcm_job(req, edesc, all_contig, encrypt); + + print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); + desc = edesc->hw_desc; - - init_chachapoly_job(req, edesc, all_contig, false); - print_hex_dump_debug("chachapoly jobdesc@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), - 1); - - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); + ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); if (!ret) { ret = -EINPROGRESS; } else { @@ -1536,124 +1498,26 @@ static int chachapoly_decrypt(struct aead_request *req) return ret; } +static int gcm_encrypt(struct aead_request *req) +{ + return gcm_crypt(req, true); +} + +static int gcm_decrypt(struct aead_request *req) +{ + return gcm_crypt(req, false); +} + static int ipsec_gcm_encrypt(struct aead_request *req) { return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req); } -static int aead_encrypt(struct aead_request *req) -{ - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret = 0; - - /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, - &all_contig, true); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* Create and submit job descriptor */ - init_authenc_job(req, edesc, all_contig, true); - - print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); - - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; -} - -static int gcm_decrypt(struct aead_request *req) -{ - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret = 0; - - /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, false); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* Create and submit job descriptor*/ - init_gcm_job(req, edesc, all_contig, false); - - print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); - - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; -} - static int ipsec_gcm_decrypt(struct aead_request *req) { return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req); } -static int aead_decrypt(struct aead_request *req) -{ - struct aead_edesc *edesc; - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct device *jrdev = ctx->jrdev; - bool all_contig; - u32 *desc; - int ret = 0; - - caam_dump_sg("dec src@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->src, - req->assoclen + req->cryptlen, 1); - - /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, - &all_contig, false); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* Create and submit job descriptor*/ - init_authenc_job(req, edesc, all_contig, false); - - print_hex_dump_debug("aead jobdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); - - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; -} - /* * allocate and map the skcipher extended descriptor for skcipher */ @@ -1815,7 +1679,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, return edesc; } -static int skcipher_encrypt(struct skcipher_request *req) +static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); @@ -1833,14 +1697,14 @@ static int skcipher_encrypt(struct skcipher_request *req) return PTR_ERR(edesc); /* Create and submit job descriptor*/ - init_skcipher_job(req, edesc, true); + init_skcipher_job(req, edesc, encrypt); print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, skcipher_encrypt_done, req); + ret = caam_jr_enqueue(jrdev, desc, skcipher_crypt_done, req); if (!ret) { ret = -EINPROGRESS; @@ -1852,40 +1716,14 @@ static int skcipher_encrypt(struct skcipher_request *req) return ret; } +static int skcipher_encrypt(struct skcipher_request *req) +{ + return skcipher_crypt(req, true); +} + static int skcipher_decrypt(struct skcipher_request *req) { - struct skcipher_edesc *edesc; - struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); - struct device *jrdev = ctx->jrdev; - u32 *desc; - int ret = 0; - - if (!req->cryptlen) - return 0; - - /* allocate extended descriptor */ - edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ); - if (IS_ERR(edesc)) - return PTR_ERR(edesc); - - /* Create and submit job descriptor*/ - init_skcipher_job(req, edesc, false); - desc = edesc->hw_desc; - - print_hex_dump_debug("skcipher jobdesc@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, - desc_bytes(edesc->hw_desc), 1); - - ret = caam_jr_enqueue(jrdev, desc, skcipher_decrypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { - skcipher_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; + return skcipher_crypt(req, false); } static struct caam_skcipher_alg driver_algs[] = { From c3f7394eb9546cad7e2ff28aeccdd4655d15fd17 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:17 +0200 Subject: [PATCH 067/162] crypto: caam - refactor ahash_done callbacks Create two common ahash_done_* functions with the dma direction as parameter. Then, these 2 are called with the proper direction for unmap. Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 88 +++++++++------------------------- 1 file changed, 22 insertions(+), 66 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 8d9143407fc5..59881a8435b0 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -565,8 +565,8 @@ static inline void ahash_unmap_ctx(struct device *dev, ahash_unmap(dev, edesc, req, dst_len); } -static void ahash_done(struct device *jrdev, u32 *desc, u32 err, - void *context) +static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, + void *context, enum dma_data_direction dir) { struct ahash_request *req = context; struct ahash_edesc *edesc; @@ -582,7 +582,7 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err, if (err) ecode = caam_jr_strstatus(jrdev, err); - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); + ahash_unmap_ctx(jrdev, edesc, req, digestsize, dir); memcpy(req->result, state->caam_ctx, digestsize); kfree(edesc); @@ -593,76 +593,20 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err, req->base.complete(&req->base, ecode); } -static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, - void *context) +static void ahash_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { - struct ahash_request *req = context; - struct ahash_edesc *edesc; - struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); - int digestsize = crypto_ahash_digestsize(ahash); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL); - kfree(edesc); - - scatterwalk_map_and_copy(state->buf, req->src, - req->nbytes - state->next_buflen, - state->next_buflen, 0); - state->buflen = state->next_buflen; - - print_hex_dump_debug("buf@" __stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->buf, - state->buflen, 1); - - print_hex_dump_debug("ctx@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, - ctx->ctx_len, 1); - if (req->result) - print_hex_dump_debug("result@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, req->result, - digestsize, 1); - - req->base.complete(&req->base, ecode); + ahash_done_cpy(jrdev, desc, err, context, DMA_FROM_DEVICE); } static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err, void *context) { - struct ahash_request *req = context; - struct ahash_edesc *edesc; - struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - int digestsize = crypto_ahash_digestsize(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - int ecode = 0; - - dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); - if (err) - ecode = caam_jr_strstatus(jrdev, err); - - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); - memcpy(req->result, state->caam_ctx, digestsize); - kfree(edesc); - - print_hex_dump_debug("ctx@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, - ctx->ctx_len, 1); - - req->base.complete(&req->base, ecode); + ahash_done_cpy(jrdev, desc, err, context, DMA_BIDIRECTIONAL); } -static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, - void *context) +static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, + void *context, enum dma_data_direction dir) { struct ahash_request *req = context; struct ahash_edesc *edesc; @@ -678,7 +622,7 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, if (err) ecode = caam_jr_strstatus(jrdev, err); - ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE); + ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, dir); kfree(edesc); scatterwalk_map_and_copy(state->buf, req->src, @@ -701,6 +645,18 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, req->base.complete(&req->base, ecode); } +static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + ahash_done_switch(jrdev, desc, err, context, DMA_BIDIRECTIONAL); +} + +static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + ahash_done_switch(jrdev, desc, err, context, DMA_FROM_DEVICE); +} + /* * Allocate an enhanced descriptor, which contains the hardware descriptor * and space for hardware scatter table containing sg_num entries. From 2ba1e79831aa0388d0d08ad649ecb933f317b61d Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:18 +0200 Subject: [PATCH 068/162] crypto: caam - refactor ahash_edesc_alloc Changed parameters for ahash_edesc_alloc function: - remove flags since they can be computed in ahash_edesc_alloc, the only place they are needed; - use ahash_request instead of caam_hash_ctx, to be able to compute gfp flags. Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 62 ++++++++++++---------------------- 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 59881a8435b0..4db85074da67 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -661,11 +661,14 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, * Allocate an enhanced descriptor, which contains the hardware descriptor * and space for hardware scatter table containing sg_num entries. */ -static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx, +static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, int sg_num, u32 *sh_desc, - dma_addr_t sh_desc_dma, - gfp_t flags) + dma_addr_t sh_desc_dma) { + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; struct ahash_edesc *edesc; unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry); @@ -724,8 +727,6 @@ static int ahash_update_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -779,8 +780,8 @@ static int ahash_update_ctx(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, pad_nents, ctx->sh_desc_update, - ctx->sh_desc_update_dma, flags); + edesc = ahash_edesc_alloc(req, pad_nents, ctx->sh_desc_update, + ctx->sh_desc_update_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -854,8 +855,6 @@ static int ahash_final_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_bytes; @@ -867,8 +866,8 @@ static int ahash_final_ctx(struct ahash_request *req) sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, 4, ctx->sh_desc_fin, - ctx->sh_desc_fin_dma, flags); + edesc = ahash_edesc_alloc(req, 4, ctx->sh_desc_fin, + ctx->sh_desc_fin_dma); if (!edesc) return -ENOMEM; @@ -920,8 +919,6 @@ static int ahash_finup_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_src_index; @@ -950,9 +947,8 @@ static int ahash_finup_ctx(struct ahash_request *req) sec4_sg_src_index = 1 + (buflen ? 1 : 0); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents, - ctx->sh_desc_fin, ctx->sh_desc_fin_dma, - flags); + edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents, + ctx->sh_desc_fin, ctx->sh_desc_fin_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1000,8 +996,6 @@ static int ahash_digest(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u32 *desc; int digestsize = crypto_ahash_digestsize(ahash); int src_nents, mapped_nents; @@ -1028,9 +1022,8 @@ static int ahash_digest(struct ahash_request *req) } /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? mapped_nents : 0, - ctx->sh_desc_digest, ctx->sh_desc_digest_dma, - flags); + edesc = ahash_edesc_alloc(req, mapped_nents > 1 ? mapped_nents : 0, + ctx->sh_desc_digest, ctx->sh_desc_digest_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1077,8 +1070,6 @@ static int ahash_final_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int buflen = state->buflen; u32 *desc; @@ -1087,8 +1078,8 @@ static int ahash_final_no_ctx(struct ahash_request *req) int ret; /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, 0, ctx->sh_desc_digest, - ctx->sh_desc_digest_dma, flags); + edesc = ahash_edesc_alloc(req, 0, ctx->sh_desc_digest, + ctx->sh_desc_digest_dma); if (!edesc) return -ENOMEM; @@ -1136,8 +1127,6 @@ static int ahash_update_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -1190,10 +1179,9 @@ static int ahash_update_no_ctx(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, pad_nents, + edesc = ahash_edesc_alloc(req, pad_nents, ctx->sh_desc_update_first, - ctx->sh_desc_update_first_dma, - flags); + ctx->sh_desc_update_first_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1261,8 +1249,6 @@ static int ahash_finup_no_ctx(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; int buflen = state->buflen; u32 *desc; int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents; @@ -1292,9 +1278,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) sizeof(struct sec4_sg_entry); /* allocate space for base edesc and hw desc commands, link tables */ - edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents, - ctx->sh_desc_digest, ctx->sh_desc_digest_dma, - flags); + edesc = ahash_edesc_alloc(req, sec4_sg_src_index + mapped_nents, + ctx->sh_desc_digest, ctx->sh_desc_digest_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; @@ -1347,8 +1332,6 @@ static int ahash_update_first(struct ahash_request *req) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct caam_hash_state *state = ahash_request_ctx(req); struct device *jrdev = ctx->jrdev; - gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC; u8 *buf = state->buf; int *buflen = &state->buflen; int *next_buflen = &state->next_buflen; @@ -1396,11 +1379,10 @@ static int ahash_update_first(struct ahash_request *req) * allocate space for base edesc and hw desc commands, * link tables */ - edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? + edesc = ahash_edesc_alloc(req, mapped_nents > 1 ? mapped_nents : 0, ctx->sh_desc_update_first, - ctx->sh_desc_update_first_dma, - flags); + ctx->sh_desc_update_first_dma); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); return -ENOMEM; From d53e44fe980ba7325ac60f85312c9facf78fbba1 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:19 +0200 Subject: [PATCH 069/162] crypto: caam - refactor RSA private key _done callbacks Create a common rsa_priv_f_done function, which based on private key form calls the specific unmap function. Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 61 +++++++++++------------------------ 1 file changed, 18 insertions(+), 43 deletions(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 6619c512ef1a..ebf167754e4f 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -132,10 +132,13 @@ static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) akcipher_request_complete(req, ecode); } -static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, - void *context) +static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err, + void *context) { struct akcipher_request *req = context; + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; struct rsa_edesc *edesc; int ecode = 0; @@ -144,45 +147,17 @@ static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); - rsa_priv_f1_unmap(dev, edesc, req); - rsa_io_unmap(dev, edesc, req); - kfree(edesc); + switch (key->priv_form) { + case FORM1: + rsa_priv_f1_unmap(dev, edesc, req); + break; + case FORM2: + rsa_priv_f2_unmap(dev, edesc, req); + break; + case FORM3: + rsa_priv_f3_unmap(dev, edesc, req); + } - akcipher_request_complete(req, ecode); -} - -static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err, - void *context) -{ - struct akcipher_request *req = context; - struct rsa_edesc *edesc; - int ecode = 0; - - if (err) - ecode = caam_jr_strstatus(dev, err); - - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); - - rsa_priv_f2_unmap(dev, edesc, req); - rsa_io_unmap(dev, edesc, req); - kfree(edesc); - - akcipher_request_complete(req, ecode); -} - -static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err, - void *context) -{ - struct akcipher_request *req = context; - struct rsa_edesc *edesc; - int ecode = 0; - - if (err) - ecode = caam_jr_strstatus(dev, err); - - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); - - rsa_priv_f3_unmap(dev, edesc, req); rsa_io_unmap(dev, edesc, req); kfree(edesc); @@ -691,7 +666,7 @@ static int caam_rsa_dec_priv_f1(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req); + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); if (!ret) return -EINPROGRESS; @@ -724,7 +699,7 @@ static int caam_rsa_dec_priv_f2(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req); + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); if (!ret) return -EINPROGRESS; @@ -757,7 +732,7 @@ static int caam_rsa_dec_priv_f3(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req); + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); if (!ret) return -EINPROGRESS; From 4d370a1036958d7df9f1492c345b4984a4eba7f6 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:20 +0200 Subject: [PATCH 070/162] crypto: caam - change return code in caam_jr_enqueue function Based on commit 6b80ea389a0b ("crypto: change transient busy return code to -ENOSPC"), change the return code of caam_jr_enqueue function to -EINPROGRESS, in case of success, -ENOSPC in case the CAAM is busy (has no space left in job ring queue), -EIO if it cannot map the caller's descriptor. Update, also, the cases for resource-freeing for each algorithm type. This is done for later use, on backlogging support in CAAM. Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 16 ++++------------ drivers/crypto/caam/caamhash.c | 34 +++++++++++----------------------- drivers/crypto/caam/caampkc.c | 16 ++++++++-------- drivers/crypto/caam/caamrng.c | 4 ++-- drivers/crypto/caam/jr.c | 8 ++++---- drivers/crypto/caam/key_gen.c | 2 +- 6 files changed, 30 insertions(+), 50 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 30fca375b9b4..c1dd8859e024 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -1398,9 +1398,7 @@ static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) 1); ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if (ret != -EINPROGRESS) { aead_unmap(jrdev, edesc, req); kfree(edesc); } @@ -1443,9 +1441,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt) desc = edesc->hw_desc; ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if (ret != -EINPROGRESS) { aead_unmap(jrdev, edesc, req); kfree(edesc); } @@ -1488,9 +1484,7 @@ static inline int gcm_crypt(struct aead_request *req, bool encrypt) desc = edesc->hw_desc; ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if (ret != -EINPROGRESS) { aead_unmap(jrdev, edesc, req); kfree(edesc); } @@ -1706,9 +1700,7 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) desc = edesc->hw_desc; ret = caam_jr_enqueue(jrdev, desc, skcipher_crypt_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if (ret != -EINPROGRESS) { skcipher_unmap(jrdev, edesc, req); kfree(edesc); } diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 4db85074da67..2af9e664a51b 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -395,7 +395,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, init_completion(&result.completion); ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); - if (!ret) { + if (ret == -EINPROGRESS) { /* in progress */ wait_for_completion(&result.completion); ret = result.err; @@ -828,10 +828,8 @@ static int ahash_update_ctx(struct ahash_request *req) desc_bytes(desc), 1); ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req); - if (ret) + if (ret != -EINPROGRESS) goto unmap_ctx; - - ret = -EINPROGRESS; } else if (*next_buflen) { scatterwalk_map_and_copy(buf + *buflen, req->src, 0, req->nbytes, 0); @@ -903,10 +901,9 @@ static int ahash_final_ctx(struct ahash_request *req) 1); ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); - if (ret) - goto unmap_ctx; + if (ret == -EINPROGRESS) + return ret; - return -EINPROGRESS; unmap_ctx: ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); kfree(edesc); @@ -980,10 +977,9 @@ static int ahash_finup_ctx(struct ahash_request *req) 1); ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); - if (ret) - goto unmap_ctx; + if (ret == -EINPROGRESS) + return ret; - return -EINPROGRESS; unmap_ctx: ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); kfree(edesc); @@ -1053,9 +1049,7 @@ static int ahash_digest(struct ahash_request *req) 1); ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if (ret != -EINPROGRESS) { ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); kfree(edesc); } @@ -1105,9 +1099,7 @@ static int ahash_final_no_ctx(struct ahash_request *req) 1); ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if (ret != -EINPROGRESS) { ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); kfree(edesc); } @@ -1218,10 +1210,9 @@ static int ahash_update_no_ctx(struct ahash_request *req) desc_bytes(desc), 1); ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); - if (ret) + if (ret != -EINPROGRESS) goto unmap_ctx; - ret = -EINPROGRESS; state->update = ahash_update_ctx; state->finup = ahash_finup_ctx; state->final = ahash_final_ctx; @@ -1310,9 +1301,7 @@ static int ahash_finup_no_ctx(struct ahash_request *req) 1); ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (!ret) { - ret = -EINPROGRESS; - } else { + if (ret != -EINPROGRESS) { ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); kfree(edesc); } @@ -1406,10 +1395,9 @@ static int ahash_update_first(struct ahash_request *req) desc_bytes(desc), 1); ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); - if (ret) + if (ret != -EINPROGRESS) goto unmap_ctx; - ret = -EINPROGRESS; state->update = ahash_update_ctx; state->finup = ahash_finup_ctx; state->final = ahash_final_ctx; diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index ebf167754e4f..7f7ea3241fa3 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -634,8 +634,8 @@ static int caam_rsa_enc(struct akcipher_request *req) init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub); ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req); - if (!ret) - return -EINPROGRESS; + if (ret == -EINPROGRESS) + return ret; rsa_pub_unmap(jrdev, edesc, req); @@ -667,8 +667,8 @@ static int caam_rsa_dec_priv_f1(struct akcipher_request *req) init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); - if (!ret) - return -EINPROGRESS; + if (ret == -EINPROGRESS) + return ret; rsa_priv_f1_unmap(jrdev, edesc, req); @@ -700,8 +700,8 @@ static int caam_rsa_dec_priv_f2(struct akcipher_request *req) init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2); ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); - if (!ret) - return -EINPROGRESS; + if (ret == -EINPROGRESS) + return ret; rsa_priv_f2_unmap(jrdev, edesc, req); @@ -733,8 +733,8 @@ static int caam_rsa_dec_priv_f3(struct akcipher_request *req) init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3); ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); - if (!ret) - return -EINPROGRESS; + if (ret == -EINPROGRESS) + return ret; rsa_priv_f3_unmap(jrdev, edesc, req); diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index e8baacaabe07..34cbb4a56f05 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -133,7 +133,7 @@ static inline int submit_job(struct caam_rng_ctx *ctx, int to_current) dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf)); init_completion(&bd->filled); err = caam_jr_enqueue(jrdev, desc, rng_done, ctx); - if (err) + if (err != -EINPROGRESS) complete(&bd->filled); /* don't wait on failed job*/ else atomic_inc(&bd->empty); /* note if pending */ @@ -153,7 +153,7 @@ static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait) if (atomic_read(&bd->empty) == BUF_EMPTY) { err = submit_job(ctx, 1); /* if can't submit job, can't even wait */ - if (err) + if (err != -EINPROGRESS) return 0; } /* no immediate data, so exit if not waiting */ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index fc97cde27059..df2a0504bcf6 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -324,8 +324,8 @@ void caam_jr_free(struct device *rdev) EXPORT_SYMBOL(caam_jr_free); /** - * caam_jr_enqueue() - Enqueue a job descriptor head. Returns 0 if OK, - * -EBUSY if the queue is full, -EIO if it cannot map the caller's + * caam_jr_enqueue() - Enqueue a job descriptor head. Returns -EINPROGRESS + * if OK, -ENOSPC if the queue is full, -EIO if it cannot map the caller's * descriptor. * @dev: device of the job ring to be used. This device should have * been assigned prior by caam_jr_register(). @@ -377,7 +377,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) { spin_unlock_bh(&jrp->inplock); dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE); - return -EBUSY; + return -ENOSPC; } head_entry = &jrp->entinfo[head]; @@ -414,7 +414,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, spin_unlock_bh(&jrp->inplock); - return 0; + return -EINPROGRESS; } EXPORT_SYMBOL(caam_jr_enqueue); diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index 5a851ddc48fb..b0e8a4939b4f 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c @@ -108,7 +108,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, init_completion(&result.completion); ret = caam_jr_enqueue(jrdev, desc, split_key_done, &result); - if (!ret) { + if (ret == -EINPROGRESS) { /* in progress */ wait_for_completion(&result.completion); ret = result.err; From ee38767f152a3310aabee7074848911f43bf5d69 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:21 +0200 Subject: [PATCH 071/162] crypto: caam - support crypto_engine framework for SKCIPHER algorithms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integrate crypto_engine into CAAM, to make use of the engine queue. Add support for SKCIPHER algorithms. This is intended to be used for CAAM backlogging support. The requests, with backlog flag (e.g. from dm-crypt) will be listed into crypto-engine queue and processed by CAAM when free. This changes the return codes for enqueuing a request: -EINPROGRESS if OK, -EBUSY if request is backlogged (via crypto-engine), -ENOSPC if the queue is full, -EIO if it cannot map the caller's descriptor. The requests, with backlog flag, will be listed into crypto-engine queue and processed by CAAM when free. Only the backlog request are sent to crypto-engine since the others can be handled by CAAM, if free, especially since JR has up to 1024 entries (more than the 10 entries from crypto-engine). Signed-off-by: Iuliana Prodan Signed-off-by: Franck LENORMAND Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 1 + drivers/crypto/caam/caamalg.c | 78 +++++++++++++++++++++++++++++++---- drivers/crypto/caam/intern.h | 2 + drivers/crypto/caam/jr.c | 28 +++++++++++++ 4 files changed, 101 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index fac5b2e26610..64f82263b20e 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -33,6 +33,7 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG menuconfig CRYPTO_DEV_FSL_CAAM_JR tristate "Freescale CAAM Job Ring driver backend" + select CRYPTO_ENGINE default y help Enables the driver module for Job Rings which are part of diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index c1dd8859e024..5aa01e3ae937 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -56,6 +56,7 @@ #include "sg_sw_sec4.h" #include "key_gen.h" #include "caamalg_desc.h" +#include /* * crypto alg @@ -101,6 +102,7 @@ struct caam_skcipher_alg { * per-session context */ struct caam_ctx { + struct crypto_engine_ctx enginectx; u32 sh_desc_enc[DESC_MAX_USED_LEN]; u32 sh_desc_dec[DESC_MAX_USED_LEN]; u8 key[CAAM_MAX_KEY_SIZE]; @@ -114,6 +116,10 @@ struct caam_ctx { unsigned int authsize; }; +struct caam_skcipher_req_ctx { + struct skcipher_edesc *edesc; +}; + static int aead_null_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -881,6 +887,7 @@ struct aead_edesc { * @mapped_dst_nents: number of segments in output h/w link table * @iv_dma: dma address of iv for checking continuity and link table * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @sec4_sg_dma: bus physical mapped address of h/w link table * @sec4_sg: pointer to h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables @@ -893,6 +900,7 @@ struct skcipher_edesc { int mapped_dst_nents; dma_addr_t iv_dma; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; u32 hw_desc[0]; @@ -967,13 +975,15 @@ static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err, { struct skcipher_request *req = context; struct skcipher_edesc *edesc; + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); int ivsize = crypto_skcipher_ivsize(skcipher); int ecode = 0; dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]); + edesc = rctx->edesc; if (err) ecode = caam_jr_strstatus(jrdev, err); @@ -999,7 +1009,14 @@ static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err, kfree(edesc); - skcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + skcipher_request_complete(req, ecode); + else + crypto_finalize_skcipher_request(jrp->engine, req, ecode); } /* @@ -1520,6 +1537,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; @@ -1618,6 +1636,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, edesc->sec4_sg_bytes = sec4_sg_bytes; edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + desc_bytes); + rctx->edesc = edesc; /* Make sure IV is located in a DMAable area */ if (ivsize) { @@ -1673,12 +1692,35 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, return edesc; } +static int skcipher_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *req = skcipher_request_cast(areq); + struct caam_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); + u32 *desc = rctx->edesc->hw_desc; + int ret; + + rctx->edesc->bklog = true; + + ret = caam_jr_enqueue(ctx->jrdev, desc, skcipher_crypt_done, req); + + if (ret != -EINPROGRESS) { + skcipher_unmap(ctx->jrdev, rctx->edesc, req); + kfree(rctx->edesc); + } else { + ret = 0; + } + + return ret; +} + static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); struct device *jrdev = ctx->jrdev; + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); u32 *desc; int ret = 0; @@ -1698,9 +1740,18 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) desc_bytes(edesc->hw_desc), 1); desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, skcipher_crypt_done, req); + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_skcipher_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, skcipher_crypt_done, req); - if (ret != -EINPROGRESS) { + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { skcipher_unmap(jrdev, edesc, req); kfree(edesc); } @@ -3221,6 +3272,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, { dma_addr_t dma_addr; struct caam_drv_private *priv; + const size_t sh_desc_enc_offset = offsetof(struct caam_ctx, + sh_desc_enc); ctx->jrdev = caam_jr_alloc(); if (IS_ERR(ctx->jrdev)) { @@ -3236,7 +3289,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_enc, offsetof(struct caam_ctx, - sh_desc_enc_dma), + sh_desc_enc_dma) - + sh_desc_enc_offset, ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(ctx->jrdev, dma_addr)) { dev_err(ctx->jrdev, "unable to map key, shared descriptors\n"); @@ -3246,8 +3300,10 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam, ctx->sh_desc_enc_dma = dma_addr; ctx->sh_desc_dec_dma = dma_addr + offsetof(struct caam_ctx, - sh_desc_dec); - ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key); + sh_desc_dec) - + sh_desc_enc_offset; + ctx->key_dma = dma_addr + offsetof(struct caam_ctx, key) - + sh_desc_enc_offset; /* copy descriptor header template value */ ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type; @@ -3261,6 +3317,11 @@ static int caam_cra_init(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_skcipher_req_ctx)); + + ctx->enginectx.op.do_one_request = skcipher_do_one_req; return caam_init_common(crypto_skcipher_ctx(tfm), &caam_alg->caam, false); @@ -3279,7 +3340,8 @@ static int caam_aead_init(struct crypto_aead *tfm) static void caam_exit_common(struct caam_ctx *ctx) { dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_enc_dma, - offsetof(struct caam_ctx, sh_desc_enc_dma), + offsetof(struct caam_ctx, sh_desc_enc_dma) - + offsetof(struct caam_ctx, sh_desc_enc), ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); caam_jr_free(ctx->jrdev); } diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index c7c10c90464b..230ea88184f5 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -11,6 +11,7 @@ #define INTERN_H #include "ctrl.h" +#include /* Currently comes from Kconfig param as a ^2 (driver-required) */ #define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE) @@ -60,6 +61,7 @@ struct caam_drv_private_jr { int out_ring_read_index; /* Output index "tail" */ int tail; /* entinfo (s/w ring) tail index */ void *outring; /* Base of output ring, DMA-safe */ + struct crypto_engine *engine; }; /* diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index df2a0504bcf6..88aff2aefd5d 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -62,6 +62,15 @@ algs_unlock: mutex_unlock(&algs_lock); } +static void caam_jr_crypto_engine_exit(void *data) +{ + struct device *jrdev = data; + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + + /* Free the resources of crypto-engine */ + crypto_engine_exit(jrpriv->engine); +} + static int caam_reset_hw_jr(struct device *dev) { struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); @@ -538,6 +547,25 @@ static int caam_jr_probe(struct platform_device *pdev) return error; } + /* Initialize crypto engine */ + jrpriv->engine = crypto_engine_alloc_init(jrdev, false); + if (!jrpriv->engine) { + dev_err(jrdev, "Could not init crypto-engine\n"); + return -ENOMEM; + } + + error = devm_add_action_or_reset(jrdev, caam_jr_crypto_engine_exit, + jrdev); + if (error) + return error; + + /* Start crypto engine */ + error = crypto_engine_start(jrpriv->engine); + if (error) { + dev_err(jrdev, "Could not start crypto-engine\n"); + return error; + } + /* Identify the interrupt */ jrpriv->irq = irq_of_parse_and_map(nprop, 0); if (!jrpriv->irq) { From 1c24022667130eefe15d49b906373807f181305c Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:22 +0200 Subject: [PATCH 072/162] crypto: caam - add crypto_engine support for AEAD algorithms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add crypto_engine support for AEAD algorithms, to make use of the engine queue. The requests, with backlog flag, will be listed into crypto-engine queue and processed by CAAM when free. If sending just the backlog request to crypto-engine, and non-blocking directly to CAAM, the latter requests have a better chance to be executed since JR has up to 1024 entries, more than the 10 entries from crypto-engine. Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 107 ++++++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 30 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 5aa01e3ae937..03797f9b1050 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -120,6 +120,10 @@ struct caam_skcipher_req_ctx { struct skcipher_edesc *edesc; }; +struct caam_aead_req_ctx { + struct aead_edesc *edesc; +}; + static int aead_null_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -864,6 +868,7 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, * @mapped_src_nents: number of segments in input h/w link table * @mapped_dst_nents: number of segments in output h/w link table * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @sec4_sg_dma: bus physical mapped address of h/w link table * @sec4_sg: pointer to h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables @@ -874,6 +879,7 @@ struct aead_edesc { int mapped_src_nents; int mapped_dst_nents; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; u32 hw_desc[]; @@ -953,12 +959,14 @@ static void aead_crypt_done(struct device *jrdev, u32 *desc, u32 err, void *context) { struct aead_request *req = context; + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct aead_edesc *edesc; int ecode = 0; dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct aead_edesc, hw_desc[0]); + edesc = rctx->edesc; if (err) ecode = caam_jr_strstatus(jrdev, err); @@ -967,7 +975,14 @@ static void aead_crypt_done(struct device *jrdev, u32 *desc, u32 err, kfree(edesc); - aead_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + aead_request_complete(req, ecode); + else + crypto_finalize_aead_request(jrp->engine, req, ecode); } static void skcipher_crypt_done(struct device *jrdev, u32 *desc, u32 err, @@ -1262,6 +1277,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0; @@ -1362,6 +1378,9 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, edesc->mapped_dst_nents = mapped_dst_nents; edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + desc_bytes; + + rctx->edesc = edesc; + *all_contig_ptr = !(mapped_src_nents > 1); sec4_sg_index = 0; @@ -1392,6 +1411,33 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, return edesc; } +static int aead_enqueue_req(struct device *jrdev, struct aead_request *req) +{ + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + struct aead_edesc *edesc = rctx->edesc; + u32 *desc = edesc->hw_desc; + int ret; + + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_aead_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { + aead_unmap(jrdev, edesc, req); + kfree(rctx->edesc); + } + + return ret; +} + static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; @@ -1400,7 +1446,6 @@ static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) struct device *jrdev = ctx->jrdev; bool all_contig; u32 *desc; - int ret; edesc = aead_edesc_alloc(req, CHACHAPOLY_DESC_JOB_IO_LEN, &all_contig, encrypt); @@ -1414,13 +1459,7 @@ static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); - if (ret != -EINPROGRESS) { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; + return aead_enqueue_req(jrdev, req); } static int chachapoly_encrypt(struct aead_request *req) @@ -1440,8 +1479,6 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt) struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool all_contig; - u32 *desc; - int ret = 0; /* allocate extended descriptor */ edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN, @@ -1456,14 +1493,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt) DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); - if (ret != -EINPROGRESS) { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; + return aead_enqueue_req(jrdev, req); } static int aead_encrypt(struct aead_request *req) @@ -1476,6 +1506,28 @@ static int aead_decrypt(struct aead_request *req) return aead_crypt(req, false); } +static int aead_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = aead_request_cast(areq); + struct caam_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct caam_aead_req_ctx *rctx = aead_request_ctx(req); + u32 *desc = rctx->edesc->hw_desc; + int ret; + + rctx->edesc->bklog = true; + + ret = caam_jr_enqueue(ctx->jrdev, desc, aead_crypt_done, req); + + if (ret != -EINPROGRESS) { + aead_unmap(ctx->jrdev, rctx->edesc, req); + kfree(rctx->edesc); + } else { + ret = 0; + } + + return ret; +} + static inline int gcm_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; @@ -1483,8 +1535,6 @@ static inline int gcm_crypt(struct aead_request *req, bool encrypt) struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool all_contig; - u32 *desc; - int ret = 0; /* allocate extended descriptor */ edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, @@ -1499,14 +1549,7 @@ static inline int gcm_crypt(struct aead_request *req, bool encrypt) DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, desc_bytes(edesc->hw_desc), 1); - desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_crypt_done, req); - if (ret != -EINPROGRESS) { - aead_unmap(jrdev, edesc, req); - kfree(edesc); - } - - return ret; + return aead_enqueue_req(jrdev, req); } static int gcm_encrypt(struct aead_request *req) @@ -3334,6 +3377,10 @@ static int caam_aead_init(struct crypto_aead *tfm) container_of(alg, struct caam_aead_alg, aead); struct caam_ctx *ctx = crypto_aead_ctx(tfm); + crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx)); + + ctx->enginectx.op.do_one_request = aead_do_one_req; + return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp); } From bf53795025a2b026c89b45be1d832a325a2a9258 Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:23 +0200 Subject: [PATCH 073/162] crypto: caam - add crypto_engine support for RSA algorithms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add crypto_engine support for RSA algorithms, to make use of the engine queue. The requests, with backlog flag, will be listed into crypto-engine queue and processed by CAAM when free. In case the queue is empty, the request is directly sent to CAAM. Only the backlog request are sent to crypto-engine since the others can be handled by CAAM, if free, especially since JR has up to 1024 entries (more than the 10 entries from crypto-engine). Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 128 +++++++++++++++++++++++++++------- drivers/crypto/caam/caampkc.h | 10 +++ 2 files changed, 114 insertions(+), 24 deletions(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 7f7ea3241fa3..4fcae37a2e33 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -117,19 +117,28 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) { struct akcipher_request *req = context; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); struct rsa_edesc *edesc; int ecode = 0; if (err) ecode = caam_jr_strstatus(dev, err); - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + edesc = req_ctx->edesc; rsa_pub_unmap(dev, edesc, req); rsa_io_unmap(dev, edesc, req); kfree(edesc); - akcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + akcipher_request_complete(req, ecode); + else + crypto_finalize_akcipher_request(jrp->engine, req, ecode); } static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err, @@ -137,15 +146,17 @@ static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err, { struct akcipher_request *req = context; struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct caam_rsa_key *key = &ctx->key; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct rsa_edesc *edesc; int ecode = 0; if (err) ecode = caam_jr_strstatus(dev, err); - edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + edesc = req_ctx->edesc; switch (key->priv_form) { case FORM1: @@ -161,7 +172,14 @@ static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err, rsa_io_unmap(dev, edesc, req); kfree(edesc); - akcipher_request_complete(req, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + akcipher_request_complete(req, ecode); + else + crypto_finalize_akcipher_request(jrp->engine, req, ecode); } /** @@ -309,6 +327,8 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, edesc->src_nents = src_nents; edesc->dst_nents = dst_nents; + req_ctx->edesc = edesc; + if (!sec4_sg_bytes) return edesc; @@ -339,6 +359,33 @@ src_fail: return ERR_PTR(-ENOMEM); } +static int akcipher_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct akcipher_request *req = container_of(areq, + struct akcipher_request, + base); + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *jrdev = ctx->dev; + u32 *desc = req_ctx->edesc->hw_desc; + int ret; + + req_ctx->edesc->bklog = true; + + ret = caam_jr_enqueue(jrdev, desc, req_ctx->akcipher_op_done, req); + + if (ret != -EINPROGRESS) { + rsa_pub_unmap(jrdev, req_ctx->edesc, req); + rsa_io_unmap(jrdev, req_ctx->edesc, req); + kfree(req_ctx->edesc); + } else { + ret = 0; + } + + return ret; +} + static int set_rsa_pub_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { @@ -602,6 +649,53 @@ unmap_p: return -ENOMEM; } +static int akcipher_enqueue_req(struct device *jrdev, + void (*cbk)(struct device *jrdev, u32 *desc, + u32 err, void *context), + struct akcipher_request *req) +{ + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); + struct rsa_edesc *edesc = req_ctx->edesc; + u32 *desc = edesc->hw_desc; + int ret; + + req_ctx->akcipher_op_done = cbk; + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_akcipher_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, cbk, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { + switch (key->priv_form) { + case FORM1: + rsa_priv_f1_unmap(jrdev, edesc, req); + break; + case FORM2: + rsa_priv_f2_unmap(jrdev, edesc, req); + break; + case FORM3: + rsa_priv_f3_unmap(jrdev, edesc, req); + break; + default: + rsa_pub_unmap(jrdev, edesc, req); + } + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + } + + return ret; +} + static int caam_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -633,11 +727,7 @@ static int caam_rsa_enc(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req); - if (ret == -EINPROGRESS) - return ret; - - rsa_pub_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_pub_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -666,11 +756,7 @@ static int caam_rsa_dec_priv_f1(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); - if (ret == -EINPROGRESS) - return ret; - - rsa_priv_f1_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -699,11 +785,7 @@ static int caam_rsa_dec_priv_f2(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); - if (ret == -EINPROGRESS) - return ret; - - rsa_priv_f2_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -732,11 +814,7 @@ static int caam_rsa_dec_priv_f3(struct akcipher_request *req) /* Initialize Job Descriptor */ init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3); - ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done, req); - if (ret == -EINPROGRESS) - return ret; - - rsa_priv_f3_unmap(jrdev, edesc, req); + return akcipher_enqueue_req(jrdev, rsa_priv_f_done, req); init_fail: rsa_io_unmap(jrdev, edesc, req); @@ -1029,6 +1107,8 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) return -ENOMEM; } + ctx->enginectx.op.do_one_request = akcipher_do_one_req; + return 0; } diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h index c68fb4c03ee6..cc889a525e2f 100644 --- a/drivers/crypto/caam/caampkc.h +++ b/drivers/crypto/caam/caampkc.h @@ -12,6 +12,7 @@ #define _PKC_DESC_H_ #include "compat.h" #include "pdb.h" +#include /** * caam_priv_key_form - CAAM RSA private key representation @@ -87,11 +88,13 @@ struct caam_rsa_key { /** * caam_rsa_ctx - per session context. + * @enginectx : crypto engine context * @key : RSA key in DMA zone * @dev : device structure * @padding_dma : dma address of padding, for adding it to the input */ struct caam_rsa_ctx { + struct crypto_engine_ctx enginectx; struct caam_rsa_key key; struct device *dev; dma_addr_t padding_dma; @@ -103,11 +106,16 @@ struct caam_rsa_ctx { * @src : input scatterlist (stripped of leading zeros) * @fixup_src : input scatterlist (that might be stripped of leading zeros) * @fixup_src_len : length of the fixup_src input scatterlist + * @edesc : s/w-extended rsa descriptor + * @akcipher_op_done : callback used when operation is done */ struct caam_rsa_req_ctx { struct scatterlist src[2]; struct scatterlist *fixup_src; unsigned int fixup_src_len; + struct rsa_edesc *edesc; + void (*akcipher_op_done)(struct device *jrdev, u32 *desc, u32 err, + void *context); }; /** @@ -117,6 +125,7 @@ struct caam_rsa_req_ctx { * @mapped_src_nents: number of segments in input h/w link table * @mapped_dst_nents: number of segments in output h/w link table * @sec4_sg_bytes : length of h/w link table + * @bklog : stored to determine if the request needs backlog * @sec4_sg_dma : dma address of h/w link table * @sec4_sg : pointer to h/w link table * @pdb : specific RSA Protocol Data Block (PDB) @@ -128,6 +137,7 @@ struct rsa_edesc { int mapped_src_nents; int mapped_dst_nents; int sec4_sg_bytes; + bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; union { From 21b014f038d384ca66bc4644cc10a1f6436787ae Mon Sep 17 00:00:00 2001 From: Iuliana Prodan Date: Wed, 12 Feb 2020 19:55:24 +0200 Subject: [PATCH 074/162] crypto: caam - add crypto_engine support for HASH algorithms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add crypto_engine support for HASH algorithms, to make use of the engine queue. The requests, with backlog flag, will be listed into crypto-engine queue and processed by CAAM when free. Only the backlog request are sent to crypto-engine since the others can be handled by CAAM, if free, especially since JR has up to 1024 entries (more than the 10 entries from crypto-engine). Signed-off-by: Iuliana Prodan Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamhash.c | 174 +++++++++++++++++++++++---------- 1 file changed, 123 insertions(+), 51 deletions(-) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 2af9e664a51b..2fe852853d40 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -65,6 +65,7 @@ #include "sg_sw_sec4.h" #include "key_gen.h" #include "caamhash_desc.h" +#include #define CAAM_CRA_PRIORITY 3000 @@ -86,6 +87,7 @@ static struct list_head hash_list; /* ahash per-session context */ struct caam_hash_ctx { + struct crypto_engine_ctx enginectx; u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; @@ -111,9 +113,12 @@ struct caam_hash_state { int buflen; int next_buflen; u8 caam_ctx[MAX_CTX_LEN] ____cacheline_aligned; - int (*update)(struct ahash_request *req); + int (*update)(struct ahash_request *req) ____cacheline_aligned; int (*final)(struct ahash_request *req); int (*finup)(struct ahash_request *req); + struct ahash_edesc *edesc; + void (*ahash_op_done)(struct device *jrdev, u32 *desc, u32 err, + void *context); }; struct caam_export_state { @@ -521,6 +526,7 @@ static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key, * @sec4_sg_dma: physical mapped address of h/w link table * @src_nents: number of segments in input scatterlist * @sec4_sg_bytes: length of dma mapped sec4_sg space + * @bklog: stored to determine if the request needs backlog * @hw_desc: the h/w job descriptor followed by any referenced link tables * @sec4_sg: h/w link table */ @@ -528,6 +534,7 @@ struct ahash_edesc { dma_addr_t sec4_sg_dma; int src_nents; int sec4_sg_bytes; + bool bklog; u32 hw_desc[DESC_JOB_IO_LEN_MAX / sizeof(u32)] ____cacheline_aligned; struct sec4_sg_entry sec4_sg[0]; }; @@ -569,6 +576,7 @@ static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, void *context, enum dma_data_direction dir) { struct ahash_request *req = context; + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); int digestsize = crypto_ahash_digestsize(ahash); @@ -578,7 +586,8 @@ static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); + edesc = state->edesc; + if (err) ecode = caam_jr_strstatus(jrdev, err); @@ -590,7 +599,14 @@ static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx, ctx->ctx_len, 1); - req->base.complete(&req->base, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + req->base.complete(&req->base, ecode); + else + crypto_finalize_hash_request(jrp->engine, req, ecode); } static void ahash_done(struct device *jrdev, u32 *desc, u32 err, @@ -609,6 +625,7 @@ static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, void *context, enum dma_data_direction dir) { struct ahash_request *req = context; + struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); @@ -618,7 +635,7 @@ static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); - edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); + edesc = state->edesc; if (err) ecode = caam_jr_strstatus(jrdev, err); @@ -642,7 +659,15 @@ static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, DUMP_PREFIX_ADDRESS, 16, 4, req->result, digestsize, 1); - req->base.complete(&req->base, ecode); + /* + * If no backlog flag, the completion of the request is done + * by CAAM, not crypto engine. + */ + if (!edesc->bklog) + req->base.complete(&req->base, ecode); + else + crypto_finalize_hash_request(jrp->engine, req, ecode); + } static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, @@ -667,6 +692,7 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; struct ahash_edesc *edesc; @@ -678,6 +704,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, return NULL; } + state->edesc = edesc; + init_job_desc_shared(edesc->hw_desc, sh_desc_dma, desc_len(sh_desc), HDR_SHARE_DEFER | HDR_REVERSE); @@ -720,6 +748,62 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx, return 0; } +static int ahash_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct caam_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct caam_hash_state *state = ahash_request_ctx(req); + struct device *jrdev = ctx->jrdev; + u32 *desc = state->edesc->hw_desc; + int ret; + + state->edesc->bklog = true; + + ret = caam_jr_enqueue(jrdev, desc, state->ahash_op_done, req); + + if (ret != -EINPROGRESS) { + ahash_unmap(jrdev, state->edesc, req, 0); + kfree(state->edesc); + } else { + ret = 0; + } + + return ret; +} + +static int ahash_enqueue_req(struct device *jrdev, + void (*cbk)(struct device *jrdev, u32 *desc, + u32 err, void *context), + struct ahash_request *req, + int dst_len, enum dma_data_direction dir) +{ + struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); + struct caam_hash_state *state = ahash_request_ctx(req); + struct ahash_edesc *edesc = state->edesc; + u32 *desc = edesc->hw_desc; + int ret; + + state->ahash_op_done = cbk; + + /* + * Only the backlog request are sent to crypto-engine since the others + * can be handled by CAAM, if free, especially since JR has up to 1024 + * entries (more than the 10 entries from crypto-engine). + */ + if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + ret = crypto_transfer_hash_request_to_engine(jrpriv->engine, + req); + else + ret = caam_jr_enqueue(jrdev, desc, cbk, req); + + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) { + ahash_unmap_ctx(jrdev, edesc, req, dst_len, dir); + kfree(edesc); + } + + return ret; +} + /* submit update job descriptor */ static int ahash_update_ctx(struct ahash_request *req) { @@ -827,9 +911,8 @@ static int ahash_update_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req); - if (ret != -EINPROGRESS) - goto unmap_ctx; + ret = ahash_enqueue_req(jrdev, ahash_done_bi, req, + ctx->ctx_len, DMA_BIDIRECTIONAL); } else if (*next_buflen) { scatterwalk_map_and_copy(buf + *buflen, req->src, 0, req->nbytes, 0); @@ -900,10 +983,8 @@ static int ahash_final_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); - if (ret == -EINPROGRESS) - return ret; - + return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req, + digestsize, DMA_BIDIRECTIONAL); unmap_ctx: ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); kfree(edesc); @@ -976,10 +1057,8 @@ static int ahash_finup_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req); - if (ret == -EINPROGRESS) - return ret; - + return ahash_enqueue_req(jrdev, ahash_done_ctx_src, req, + digestsize, DMA_BIDIRECTIONAL); unmap_ctx: ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL); kfree(edesc); @@ -1048,13 +1127,8 @@ static int ahash_digest(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (ret != -EINPROGRESS) { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, digestsize, + DMA_FROM_DEVICE); } /* submit ahash final if it the first job descriptor */ @@ -1098,18 +1172,12 @@ static int ahash_final_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (ret != -EINPROGRESS) { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, + digestsize, DMA_FROM_DEVICE); unmap: ahash_unmap(jrdev, edesc, req, digestsize); kfree(edesc); return -ENOMEM; - } /* submit ahash update if it the first job descriptor after update */ @@ -1209,10 +1277,10 @@ static int ahash_update_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); - if (ret != -EINPROGRESS) - goto unmap_ctx; - + ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req, + ctx->ctx_len, DMA_TO_DEVICE); + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) + return ret; state->update = ahash_update_ctx; state->finup = ahash_finup_ctx; state->final = ahash_final_ctx; @@ -1300,13 +1368,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done, req); - if (ret != -EINPROGRESS) { - ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE); - kfree(edesc); - } - - return ret; + return ahash_enqueue_req(jrdev, ahash_done, req, + digestsize, DMA_FROM_DEVICE); unmap: ahash_unmap(jrdev, edesc, req, digestsize); kfree(edesc); @@ -1394,10 +1457,10 @@ static int ahash_update_first(struct ahash_request *req) DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); - ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req); - if (ret != -EINPROGRESS) - goto unmap_ctx; - + ret = ahash_enqueue_req(jrdev, ahash_done_ctx_dst, req, + ctx->ctx_len, DMA_TO_DEVICE); + if ((ret != -EINPROGRESS) && (ret != -EBUSY)) + return ret; state->update = ahash_update_ctx; state->finup = ahash_finup_ctx; state->final = ahash_final_ctx; @@ -1700,6 +1763,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) HASH_MSG_LEN + SHA256_DIGEST_SIZE, HASH_MSG_LEN + 64, HASH_MSG_LEN + SHA512_DIGEST_SIZE }; + const size_t sh_desc_update_offset = offsetof(struct caam_hash_ctx, + sh_desc_update); dma_addr_t dma_addr; struct caam_drv_private *priv; @@ -1752,7 +1817,8 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) } dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update, - offsetof(struct caam_hash_ctx, key), + offsetof(struct caam_hash_ctx, key) - + sh_desc_update_offset, ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(ctx->jrdev, dma_addr)) { dev_err(ctx->jrdev, "unable to map shared descriptors\n"); @@ -1770,11 +1836,16 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->sh_desc_update_dma = dma_addr; ctx->sh_desc_update_first_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_update_first); + sh_desc_update_first) - + sh_desc_update_offset; ctx->sh_desc_fin_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_fin); + sh_desc_fin) - + sh_desc_update_offset; ctx->sh_desc_digest_dma = dma_addr + offsetof(struct caam_hash_ctx, - sh_desc_digest); + sh_desc_digest) - + sh_desc_update_offset; + + ctx->enginectx.op.do_one_request = ahash_do_one_req; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct caam_hash_state)); @@ -1791,7 +1862,8 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm) struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma, - offsetof(struct caam_hash_ctx, key), + offsetof(struct caam_hash_ctx, key) - + offsetof(struct caam_hash_ctx, sh_desc_update), ctx->dir, DMA_ATTR_SKIP_CPU_SYNC); if (ctx->key_dir != DMA_NONE) dma_unmap_single_attrs(ctx->jrdev, ctx->adata.key_dma, From e44362ab03ca37a8a90bb100e7706dc85e0a3ac1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 13 Feb 2020 10:50:54 -0600 Subject: [PATCH 075/162] crypto: img-hash - Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Herbert Xu --- drivers/crypto/img-hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index 25d5227f74a1..0e25fc3087f3 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -103,7 +103,7 @@ struct img_hash_request_ctx { struct ahash_request fallback_req; /* Zero length buffer must remain last member of struct */ - u8 buffer[0] __aligned(sizeof(u32)); + u8 buffer[] __aligned(sizeof(u32)); }; struct img_hash_ctx { From a4a70fa91b33c3a52cb5bf2992f770cc7210687b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 13 Feb 2020 11:21:30 -0600 Subject: [PATCH 076/162] crypto: s5p-sss - Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Acked-by: Kamil Konieczny Signed-off-by: Herbert Xu --- drivers/crypto/s5p-sss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index d66e20a2f54c..2a16800d2579 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -369,7 +369,7 @@ struct s5p_hash_reqctx { bool error; u32 bufcnt; - u8 buffer[0]; + u8 buffer[]; }; /** From ff462ddfd95b915345c3c7c037c3bfafdc58bae7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Feb 2020 06:14:16 +0000 Subject: [PATCH 077/162] crypto: chelsio - Endianess bug in create_authenc_wr kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4) - sizeof(chcr_req->key_ctx); can't possibly be endian-safe. Look: ->key_ctx_hdr is __be32. And KEY_CONTEXT_CTX_LEN_V is "shift up by 24 bits". On little-endian hosts it sees b0 b1 b2 b3 in memory, inteprets that into b0 + (b1 << 8) + (b2 << 16) + (b3 << 24), shifts up by 24, resulting in b0 << 24, does ntohl (byteswap on l-e), gets b0 and shifts that up by 4. So we get b0 * 16 - sizeof(...). Sounds reasonable, but on b-e we get b3 + (b2 << 8) + (b1 << 16) + (b0 << 24), shift up by 24, yielding b3 << 24, do ntohl (no-op on b-e) and then shift up by 4. Resulting in b3 << 28 - sizeof(...), i.e. slightly under b3 * 256M. Then we increase it some more and pass to alloc_skb() as size. Somehow I doubt that we really want a quarter-gigabyte skb allocation here... Note that when you are building those values in #define FILL_KEY_CTX_HDR(ck_size, mk_size, d_ck, opad, ctx_len) \ htonl(KEY_CONTEXT_VALID_V(1) | \ KEY_CONTEXT_CK_SIZE_V((ck_size)) | \ KEY_CONTEXT_MK_SIZE_V(mk_size) | \ KEY_CONTEXT_DUAL_CK_V((d_ck)) | \ KEY_CONTEXT_OPAD_PRESENT_V((opad)) | \ KEY_CONTEXT_SALT_PRESENT_V(1) | \ KEY_CONTEXT_CTX_LEN_V((ctx_len))) ctx_len ends up in the first octet (i.e. b0 in the above), which matches the current behaviour on l-e. If that's the intent, this thing should've been kctx_len = (KEY_CONTEXT_CTX_LEN_G(ntohl(aeadctx->key_ctx_hdr)) << 4) - sizeof(chcr_req->key_ctx); instead - fetch after ntohl() we get (b0 << 24) + (b1 << 16) + (b2 << 8) + b3, shift it down by 24 (b0), resuling in b0 * 16 - sizeof(...) both on l-e and on b-e. PS: when sparse warns you about endianness problems, it might be worth checking if there really is something wrong. And I don't mean "slap __force cast on it"... Signed-off-by: Al Viro Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 02b0ddb785a4..96c1b5fc9081 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -2369,7 +2369,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, snents = sg_nents_xlen(req->src, req->assoclen + req->cryptlen, CHCR_SRC_SG_SIZE, 0); dst_size = get_space_for_phys_dsgl(dnents); - kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4) + kctx_len = (KEY_CONTEXT_CTX_LEN_G(ntohl(aeadctx->key_ctx_hdr)) << 4) - sizeof(chcr_req->key_ctx); transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); reqctx->imm = (transhdr_len + req->assoclen + req->cryptlen) < From 057a0fb649f3bfcd3048e22e15fa613026076119 Mon Sep 17 00:00:00 2001 From: Kalyani Akula Date: Mon, 17 Feb 2020 15:56:41 +0530 Subject: [PATCH 078/162] firmware: xilinx: Add ZynqMP aes API for AES functionality Add ZynqMP firmware AES API to perform encryption/decryption of given data. Signed-off-by: Kalyani Akula Acked-by: Michal Simek Signed-off-by: Herbert Xu --- drivers/firmware/xilinx/zynqmp.c | 25 +++++++++++++++++++++++++ include/linux/firmware/xlnx-zynqmp.h | 2 ++ 2 files changed, 27 insertions(+) diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index ecc339d846de..1dea880b2397 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -707,6 +707,30 @@ static int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, qos, ack, NULL); } +/** + * zynqmp_pm_aes - Access AES hardware to encrypt/decrypt the data using + * AES-GCM core. + * @address: Address of the AesParams structure. + * @out: Returned output value + * + * Return: Returns status, either success or error code. + */ +static int zynqmp_pm_aes_engine(const u64 address, u32 *out) +{ + u32 ret_payload[PAYLOAD_ARG_CNT]; + int ret; + + if (!out) + return -EINVAL; + + ret = zynqmp_pm_invoke_fn(PM_SECURE_AES, upper_32_bits(address), + lower_32_bits(address), + 0, 0, ret_payload); + *out = ret_payload[1]; + + return ret; +} + static const struct zynqmp_eemi_ops eemi_ops = { .get_api_version = zynqmp_pm_get_api_version, .get_chipid = zynqmp_pm_get_chipid, @@ -730,6 +754,7 @@ static const struct zynqmp_eemi_ops eemi_ops = { .set_requirement = zynqmp_pm_set_requirement, .fpga_load = zynqmp_pm_fpga_load, .fpga_get_status = zynqmp_pm_fpga_get_status, + .aes = zynqmp_pm_aes_engine, }; /** diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 2cd12ebd6826..8acace459351 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -82,6 +82,7 @@ enum pm_api_id { PM_CLOCK_GETRATE, PM_CLOCK_SETPARENT, PM_CLOCK_GETPARENT, + PM_SECURE_AES = 47, PM_FEATURE_CHECK = 63, PM_API_MAX, }; @@ -315,6 +316,7 @@ struct zynqmp_eemi_ops { const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); + int (*aes)(const u64 address, u32 *out); }; int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1, From 4c4f3f33838faa740f97aa4ab5ab471684919003 Mon Sep 17 00:00:00 2001 From: Kalyani Akula Date: Mon, 17 Feb 2020 15:56:42 +0530 Subject: [PATCH 079/162] dt-bindings: crypto: Add bindings for ZynqMP AES-GCM driver Add documentation to describe Xilinx ZynqMP AES-GCM driver bindings. Signed-off-by: Kalyani Akula Reviewed-by: Rob Herring Acked-by: Michal Simek Signed-off-by: Herbert Xu --- .../bindings/crypto/xlnx,zynqmp-aes.yaml | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml diff --git a/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml new file mode 100644 index 000000000000..55dd6e3d270d --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/xlnx,zynqmp-aes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Xilinx ZynqMP AES-GCM Hardware Accelerator Device Tree Bindings + +maintainers: + - Kalyani Akula + - Michal Simek + +description: | + The ZynqMP AES-GCM hardened cryptographic accelerator is used to + encrypt or decrypt the data with provided key and initialization vector. + +properties: + compatible: + const: xlnx,zynqmp-aes + +required: + - compatible + +additionalProperties: false + +examples: + - | + firmware { + zynqmp_firmware: zynqmp-firmware { + compatible = "xlnx,zynqmp-firmware"; + method = "smc"; + xlnx_aes: zynqmp-aes { + compatible = "xlnx,zynqmp-aes"; + }; + }; + }; +... From 4d96f7d48131fefe30d7c1d1e2a23ef37164dbf5 Mon Sep 17 00:00:00 2001 From: Kalyani Akula Date: Mon, 17 Feb 2020 15:56:43 +0530 Subject: [PATCH 080/162] crypto: xilinx - Add Xilinx AES driver This patch adds AES driver support for the Xilinx ZynqMP SoC. Signed-off-by: Mohan Marutirao Dhanawade Signed-off-by: Kalyani Akula Acked-by: Michal Simek Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 12 + drivers/crypto/Makefile | 1 + drivers/crypto/xilinx/Makefile | 2 + drivers/crypto/xilinx/zynqmp-aes-gcm.c | 457 +++++++++++++++++++++++++ 4 files changed, 472 insertions(+) create mode 100644 drivers/crypto/xilinx/Makefile create mode 100644 drivers/crypto/xilinx/zynqmp-aes-gcm.c diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 052d3ff7fb20..f81d4cf86143 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -754,6 +754,18 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +config CRYPTO_DEV_ZYNQMP_AES + tristate "Support for Xilinx ZynqMP AES hw accelerator" + depends on ZYNQMP_FIRMWARE || COMPILE_TEST + select CRYPTO_AES + select CRYPTO_ENGINE + select CRYPTO_AEAD + help + Xilinx ZynqMP has AES-GCM engine used for symmetric key + encryption and decryption. This driver interfaces with AES hw + accelerator. Select this if you want to use the ZynqMP module + for AES algorithms. + config CRYPTO_DEV_MEDIATEK tristate "MediaTek's EIP97 Cryptographic Engine driver" depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 40229d499476..d505d16ec00e 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -47,5 +47,6 @@ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/ obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/ +obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += xilinx/ obj-y += hisilicon/ obj-$(CONFIG_CRYPTO_DEV_AMLOGIC_GXL) += amlogic/ diff --git a/drivers/crypto/xilinx/Makefile b/drivers/crypto/xilinx/Makefile new file mode 100644 index 000000000000..534e32daf76a --- /dev/null +++ b/drivers/crypto/xilinx/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_ZYNQMP_AES) += zynqmp-aes-gcm.o diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c new file mode 100644 index 000000000000..09f7f468eef8 --- /dev/null +++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Xilinx ZynqMP AES Driver. + * Copyright (c) 2020 Xilinx Inc. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define ZYNQMP_DMA_BIT_MASK 32U + +#define ZYNQMP_AES_KEY_SIZE AES_KEYSIZE_256 +#define ZYNQMP_AES_AUTH_SIZE 16U +#define ZYNQMP_KEY_SRC_SEL_KEY_LEN 1U +#define ZYNQMP_AES_BLK_SIZE 1U +#define ZYNQMP_AES_MIN_INPUT_BLK_SIZE 4U +#define ZYNQMP_AES_WORD_LEN 4U + +#define ZYNQMP_AES_GCM_TAG_MISMATCH_ERR 0x01 +#define ZYNQMP_AES_WRONG_KEY_SRC_ERR 0x13 +#define ZYNQMP_AES_PUF_NOT_PROGRAMMED 0xE300 + +enum zynqmp_aead_op { + ZYNQMP_AES_DECRYPT = 0, + ZYNQMP_AES_ENCRYPT +}; + +enum zynqmp_aead_keysrc { + ZYNQMP_AES_KUP_KEY = 0, + ZYNQMP_AES_DEV_KEY, + ZYNQMP_AES_PUF_KEY +}; + +struct zynqmp_aead_drv_ctx { + union { + struct aead_alg aead; + } alg; + struct device *dev; + struct crypto_engine *engine; + const struct zynqmp_eemi_ops *eemi_ops; +}; + +struct zynqmp_aead_hw_req { + u64 src; + u64 iv; + u64 key; + u64 dst; + u64 size; + u64 op; + u64 keysrc; +}; + +struct zynqmp_aead_tfm_ctx { + struct crypto_engine_ctx engine_ctx; + struct device *dev; + u8 key[ZYNQMP_AES_KEY_SIZE]; + u8 *iv; + u32 keylen; + u32 authsize; + enum zynqmp_aead_keysrc keysrc; + struct crypto_aead *fbk_cipher; +}; + +struct zynqmp_aead_req_ctx { + enum zynqmp_aead_op op; +}; + +static int zynqmp_aes_aead_cipher(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + struct device *dev = tfm_ctx->dev; + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_drv_ctx *drv_ctx; + struct zynqmp_aead_hw_req *hwreq; + dma_addr_t dma_addr_data, dma_addr_hw_req; + unsigned int data_size; + unsigned int status; + size_t dma_size; + char *kbuf; + int err; + + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + if (!drv_ctx->eemi_ops->aes) + return -ENOTSUPP; + + if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY) + dma_size = req->cryptlen + ZYNQMP_AES_KEY_SIZE + + GCM_AES_IV_SIZE; + else + dma_size = req->cryptlen + GCM_AES_IV_SIZE; + + kbuf = dma_alloc_coherent(dev, dma_size, &dma_addr_data, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + hwreq = dma_alloc_coherent(dev, sizeof(struct zynqmp_aead_hw_req), + &dma_addr_hw_req, GFP_KERNEL); + if (!hwreq) { + dma_free_coherent(dev, dma_size, kbuf, dma_addr_data); + return -ENOMEM; + } + + data_size = req->cryptlen; + scatterwalk_map_and_copy(kbuf, req->src, 0, req->cryptlen, 0); + memcpy(kbuf + data_size, req->iv, GCM_AES_IV_SIZE); + + hwreq->src = dma_addr_data; + hwreq->dst = dma_addr_data; + hwreq->iv = hwreq->src + data_size; + hwreq->keysrc = tfm_ctx->keysrc; + hwreq->op = rq_ctx->op; + + if (hwreq->op == ZYNQMP_AES_ENCRYPT) + hwreq->size = data_size; + else + hwreq->size = data_size - ZYNQMP_AES_AUTH_SIZE; + + if (hwreq->keysrc == ZYNQMP_AES_KUP_KEY) { + memcpy(kbuf + data_size + GCM_AES_IV_SIZE, + tfm_ctx->key, ZYNQMP_AES_KEY_SIZE); + + hwreq->key = hwreq->src + data_size + GCM_AES_IV_SIZE; + } else { + hwreq->key = 0; + } + + drv_ctx->eemi_ops->aes(dma_addr_hw_req, &status); + + if (status) { + switch (status) { + case ZYNQMP_AES_GCM_TAG_MISMATCH_ERR: + dev_err(dev, "ERROR: Gcm Tag mismatch\n"); + break; + case ZYNQMP_AES_WRONG_KEY_SRC_ERR: + dev_err(dev, "ERROR: Wrong KeySrc, enable secure mode\n"); + break; + case ZYNQMP_AES_PUF_NOT_PROGRAMMED: + dev_err(dev, "ERROR: PUF is not registered\n"); + break; + default: + dev_err(dev, "ERROR: Unknown error\n"); + break; + } + err = -status; + } else { + if (hwreq->op == ZYNQMP_AES_ENCRYPT) + data_size = data_size + ZYNQMP_AES_AUTH_SIZE; + else + data_size = data_size - ZYNQMP_AES_AUTH_SIZE; + + sg_copy_from_buffer(req->dst, sg_nents(req->dst), + kbuf, data_size); + err = 0; + } + + if (kbuf) { + memzero_explicit(kbuf, dma_size); + dma_free_coherent(dev, dma_size, kbuf, dma_addr_data); + } + if (hwreq) { + memzero_explicit(hwreq, sizeof(struct zynqmp_aead_hw_req)); + dma_free_coherent(dev, sizeof(struct zynqmp_aead_hw_req), + hwreq, dma_addr_hw_req); + } + return err; +} + +static int zynqmp_fallback_check(struct zynqmp_aead_tfm_ctx *tfm_ctx, + struct aead_request *req) +{ + int need_fallback = 0; + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + if (tfm_ctx->authsize != ZYNQMP_AES_AUTH_SIZE) + need_fallback = 1; + + if (tfm_ctx->keysrc == ZYNQMP_AES_KUP_KEY && + tfm_ctx->keylen != ZYNQMP_AES_KEY_SIZE) { + need_fallback = 1; + } + if (req->assoclen != 0 || + req->cryptlen < ZYNQMP_AES_MIN_INPUT_BLK_SIZE) { + need_fallback = 1; + } + if ((req->cryptlen % ZYNQMP_AES_WORD_LEN) != 0) + need_fallback = 1; + + if (rq_ctx->op == ZYNQMP_AES_DECRYPT && + req->cryptlen <= ZYNQMP_AES_AUTH_SIZE) { + need_fallback = 1; + } + return need_fallback; +} + +static int zynqmp_handle_aes_req(struct crypto_engine *engine, + void *req) +{ + struct aead_request *areq = + container_of(req, struct aead_request, base); + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct zynqmp_aead_tfm_ctx *tfm_ctx = crypto_aead_ctx(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(areq); + struct aead_request *subreq = aead_request_ctx(req); + int need_fallback; + int err; + + need_fallback = zynqmp_fallback_check(tfm_ctx, areq); + + if (need_fallback) { + aead_request_set_tfm(subreq, tfm_ctx->fbk_cipher); + + aead_request_set_callback(subreq, areq->base.flags, + NULL, NULL); + aead_request_set_crypt(subreq, areq->src, areq->dst, + areq->cryptlen, areq->iv); + aead_request_set_ad(subreq, areq->assoclen); + if (rq_ctx->op == ZYNQMP_AES_ENCRYPT) + err = crypto_aead_encrypt(subreq); + else + err = crypto_aead_decrypt(subreq); + } else { + err = zynqmp_aes_aead_cipher(areq); + } + + crypto_finalize_aead_request(engine, areq, err); + return 0; +} + +static int zynqmp_aes_aead_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + unsigned char keysrc; + + if (keylen == ZYNQMP_KEY_SRC_SEL_KEY_LEN) { + keysrc = *key; + if (keysrc == ZYNQMP_AES_KUP_KEY || + keysrc == ZYNQMP_AES_DEV_KEY || + keysrc == ZYNQMP_AES_PUF_KEY) { + tfm_ctx->keysrc = (enum zynqmp_aead_keysrc)keysrc; + } else { + tfm_ctx->keylen = keylen; + } + } else { + tfm_ctx->keylen = keylen; + if (keylen == ZYNQMP_AES_KEY_SIZE) { + tfm_ctx->keysrc = ZYNQMP_AES_KUP_KEY; + memcpy(tfm_ctx->key, key, keylen); + } + } + + tfm_ctx->fbk_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + tfm_ctx->fbk_cipher->base.crt_flags |= (aead->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + + return crypto_aead_setkey(tfm_ctx->fbk_cipher, key, keylen); +} + +static int zynqmp_aes_aead_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + + tfm_ctx->authsize = authsize; + return crypto_aead_setauthsize(tfm_ctx->fbk_cipher, authsize); +} + +static int zynqmp_aes_aead_encrypt(struct aead_request *req) +{ + struct zynqmp_aead_drv_ctx *drv_ctx; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + rq_ctx->op = ZYNQMP_AES_ENCRYPT; + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req); +} + +static int zynqmp_aes_aead_decrypt(struct aead_request *req) +{ + struct zynqmp_aead_drv_ctx *drv_ctx; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct aead_alg *alg = crypto_aead_alg(aead); + struct zynqmp_aead_req_ctx *rq_ctx = aead_request_ctx(req); + + rq_ctx->op = ZYNQMP_AES_DECRYPT; + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + + return crypto_transfer_aead_request_to_engine(drv_ctx->engine, req); +} + +static int zynqmp_aes_aead_init(struct crypto_aead *aead) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + struct zynqmp_aead_drv_ctx *drv_ctx; + struct aead_alg *alg = crypto_aead_alg(aead); + + drv_ctx = container_of(alg, struct zynqmp_aead_drv_ctx, alg.aead); + tfm_ctx->dev = drv_ctx->dev; + + tfm_ctx->engine_ctx.op.do_one_request = zynqmp_handle_aes_req; + tfm_ctx->engine_ctx.op.prepare_request = NULL; + tfm_ctx->engine_ctx.op.unprepare_request = NULL; + + tfm_ctx->fbk_cipher = crypto_alloc_aead(drv_ctx->alg.aead.base.cra_name, + 0, + CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(tfm_ctx->fbk_cipher)) { + pr_err("%s() Error: failed to allocate fallback for %s\n", + __func__, drv_ctx->alg.aead.base.cra_name); + return PTR_ERR(tfm_ctx->fbk_cipher); + } + + crypto_aead_set_reqsize(aead, + max(sizeof(struct zynqmp_aead_req_ctx), + sizeof(struct aead_request) + + crypto_aead_reqsize(tfm_ctx->fbk_cipher))); + return 0; +} + +static void zynqmp_aes_aead_exit(struct crypto_aead *aead) +{ + struct crypto_tfm *tfm = crypto_aead_tfm(aead); + struct zynqmp_aead_tfm_ctx *tfm_ctx = + (struct zynqmp_aead_tfm_ctx *)crypto_tfm_ctx(tfm); + + if (tfm_ctx->fbk_cipher) { + crypto_free_aead(tfm_ctx->fbk_cipher); + tfm_ctx->fbk_cipher = NULL; + } + memzero_explicit(tfm_ctx, sizeof(struct zynqmp_aead_tfm_ctx)); +} + +static struct zynqmp_aead_drv_ctx aes_drv_ctx = { + .alg.aead = { + .setkey = zynqmp_aes_aead_setkey, + .setauthsize = zynqmp_aes_aead_setauthsize, + .encrypt = zynqmp_aes_aead_encrypt, + .decrypt = zynqmp_aes_aead_decrypt, + .init = zynqmp_aes_aead_init, + .exit = zynqmp_aes_aead_exit, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = ZYNQMP_AES_AUTH_SIZE, + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "xilinx-zynqmp-aes-gcm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = ZYNQMP_AES_BLK_SIZE, + .cra_ctxsize = sizeof(struct zynqmp_aead_tfm_ctx), + .cra_module = THIS_MODULE, + } + } +}; + +static int zynqmp_aes_aead_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int err; + + /* ZynqMP AES driver supports only one instance */ + if (!aes_drv_ctx.dev) + aes_drv_ctx.dev = dev; + else + return -ENODEV; + + aes_drv_ctx.eemi_ops = zynqmp_pm_get_eemi_ops(); + if (IS_ERR(aes_drv_ctx.eemi_ops)) { + dev_err(dev, "Failed to get ZynqMP EEMI interface\n"); + return PTR_ERR(aes_drv_ctx.eemi_ops); + } + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(ZYNQMP_DMA_BIT_MASK)); + if (err < 0) { + dev_err(dev, "No usable DMA configuration\n"); + return err; + } + + aes_drv_ctx.engine = crypto_engine_alloc_init(dev, 1); + if (!aes_drv_ctx.engine) { + dev_err(dev, "Cannot alloc AES engine\n"); + err = -ENOMEM; + goto err_engine; + } + + err = crypto_engine_start(aes_drv_ctx.engine); + if (err) { + dev_err(dev, "Cannot start AES engine\n"); + goto err_engine; + } + + err = crypto_register_aead(&aes_drv_ctx.alg.aead); + if (err < 0) { + dev_err(dev, "Failed to register AEAD alg.\n"); + goto err_aead; + } + return 0; + +err_aead: + crypto_unregister_aead(&aes_drv_ctx.alg.aead); + +err_engine: + if (aes_drv_ctx.engine) + crypto_engine_exit(aes_drv_ctx.engine); + + return err; +} + +static int zynqmp_aes_aead_remove(struct platform_device *pdev) +{ + crypto_engine_exit(aes_drv_ctx.engine); + crypto_unregister_aead(&aes_drv_ctx.alg.aead); + + return 0; +} + +static const struct of_device_id zynqmp_aes_dt_ids[] = { + { .compatible = "xlnx,zynqmp-aes" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, zynqmp_aes_dt_ids); + +static struct platform_driver zynqmp_aes_driver = { + .probe = zynqmp_aes_aead_probe, + .remove = zynqmp_aes_aead_remove, + .driver = { + .name = "zynqmp-aes", + .of_match_table = zynqmp_aes_dt_ids, + }, +}; + +module_platform_driver(zynqmp_aes_driver); +MODULE_LICENSE("GPL"); From f747426d6b3516b228a7e2e0354cb5e3c55ba8e5 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Mon, 17 Feb 2020 19:09:27 +0530 Subject: [PATCH 081/162] MAINTAINERS: Update maintainers for chelsio crypto drivers This updates the maintainer list for chelsio crypto drivers. Signed-off-by: Ayush Sawal Signed-off-by: Herbert Xu --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 38fe2f3f7b6f..12765a0b08e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4542,7 +4542,9 @@ S: Supported F: drivers/scsi/cxgbi/cxgb3i CXGB4 CRYPTO DRIVER (chcr) -M: Atul Gupta +M: Ayush Sawal +M: Vinay Kumar Yadav +M: Rohit Maheshwari L: linux-crypto@vger.kernel.org W: http://www.chelsio.com S: Supported From 6ff78ffaddd807cd458ec3fdcc714d3a79414e5e Mon Sep 17 00:00:00 2001 From: Devulapally Shiva Krishna Date: Wed, 19 Feb 2020 18:43:57 +0530 Subject: [PATCH 082/162] crypto: chelsio - un-register crypto algorithms When a PCI device will be removed, cxgb4(LLD) will notify chcr(ULD). Incase if it's a last pci device, chcr should un-register all the crypto algorithms. Signed-off-by: Devulapally Shiva Krishna Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 507ba20f0874..6e02254c007a 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -271,6 +271,8 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state) case CXGB4_STATE_DETACH: chcr_detach_device(u_ctx); + if (!atomic_read(&drv_data.dev_count)) + stop_crypto(); break; case CXGB4_STATE_START_RECOVERY: From 2fdddaf089ef4917e15eefacca2fa2e91e819683 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 21 Feb 2020 21:55:15 +0800 Subject: [PATCH 083/162] crypto: md5 - remove unused macros crypto/md5.c:26:0: warning: macro "MD5_DIGEST_WORDS" is not used [-Wunused-macros] crypto/md5.c:27:0: warning: macro "MD5_MESSAGE_BYTES" is not used [-Wunused-macros] They are never used since commit 3c7eb3cc8360 ("md5: remove from lib and only live in crypto"). Signed-off-by: YueHaibing Signed-off-by: Herbert Xu --- crypto/md5.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/crypto/md5.c b/crypto/md5.c index 22dc60bc0437..72c0c46fb5ee 100644 --- a/crypto/md5.c +++ b/crypto/md5.c @@ -23,9 +23,6 @@ #include #include -#define MD5_DIGEST_WORDS 4 -#define MD5_MESSAGE_BYTES 64 - const u8 md5_zero_message_hash[MD5_DIGEST_SIZE] = { 0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04, 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, From ba02b35239f7ea08ee9959689a66fee8782c1c8c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 22 Feb 2020 12:38:55 +1100 Subject: [PATCH 084/162] hwrng: omap3-rom - Include linux/io.h for virt_to_phys This patch adds linux/io.h to the header list to ensure that we get virt_to_phys on all architectures. Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap3-rom-rng.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index e08a8887e718..a431c5cbe2be 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include From 1b44f93eb3e51f97d06a92eb39ab1ad031851f74 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 22 Feb 2020 14:24:09 +0000 Subject: [PATCH 085/162] crypto: hisilicon - remove redundant assignment of pointer ctx Pointer ctx is being re-assigned with the same value as it was initialized with. The second assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index a2cfcc9ccd94..acd15507eb8a 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -447,7 +447,6 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm) struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; - ctx = crypto_skcipher_ctx(tfm); ctx->alg_type = SEC_SKCIPHER; crypto_skcipher_set_reqsize(tfm, sizeof(struct sec_req)); ctx->c_ctx.ivsize = crypto_skcipher_ivsize(tfm); From bed44d0c05d8efdda1ae98b1c7aa7b116abeadb2 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Mon, 24 Feb 2020 09:12:32 +0530 Subject: [PATCH 086/162] crypto: chelsio - Recalculate iv only if it is needed Recalculate iv only if it is needed i.e. if the last req to hw was partial for aes-xts. Signed-off-by: Ayush Sawal Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 12 ++++++++++-- drivers/crypto/chelsio/chcr_crypto.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 96c1b5fc9081..17ce6970dab4 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -1086,8 +1086,12 @@ static int chcr_final_cipher_iv(struct skcipher_request *req, if (subtype == CRYPTO_ALG_SUB_TYPE_CTR) ctr_add_iv(iv, req->iv, DIV_ROUND_UP(reqctx->processed, AES_BLOCK_SIZE)); - else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS) - ret = chcr_update_tweak(req, iv, 1); + else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS) { + if (!reqctx->partial_req) + memcpy(iv, reqctx->iv, AES_BLOCK_SIZE); + else + ret = chcr_update_tweak(req, iv, 1); + } else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) { /*Already updated for Decrypt*/ if (!reqctx->op) @@ -1199,6 +1203,7 @@ static int process_cipher(struct skcipher_request *req, int bytes, err = -EINVAL; reqctx->processed = 0; + reqctx->partial_req = 0; if (!req->iv) goto error; if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) || @@ -1289,6 +1294,7 @@ static int process_cipher(struct skcipher_request *req, } reqctx->processed = bytes; reqctx->last_req_len = bytes; + reqctx->partial_req = !!(req->cryptlen - reqctx->processed); return 0; unmap: @@ -1300,6 +1306,7 @@ error: static int chcr_aes_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); struct chcr_context *ctx; struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; @@ -1329,6 +1336,7 @@ static int chcr_aes_encrypt(struct skcipher_request *req) CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == CRYPTO_TFM_REQ_MAY_SLEEP ) { ctx=c_ctx(tfm); + reqctx->partial_req = 1; wait_for_completion(&ctx->cbc_aes_aio_done); } return isfull ? -EBUSY : -EINPROGRESS; diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index 9207d88c5538..dbb7e13bb409 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -291,6 +291,7 @@ struct chcr_skcipher_req_ctx { struct scatterlist *dstsg; unsigned int processed; unsigned int last_req_len; + unsigned int partial_req; struct scatterlist *srcsg; unsigned int src_ofst; unsigned int dst_ofst; From 567be3a5d2270fb1971212f704240d6235a2c060 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Mon, 24 Feb 2020 09:12:33 +0530 Subject: [PATCH 087/162] crypto: chelsio - Use multiple txq/rxq per tfm to process the requests This patch enables chcr to use multiple txq/rxq per tfm to process the crypto requests. The txq/rxq are selected based on cpu core-id. Signed-off-by: Ayush Sawal Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 325 +++++++++++++++++---------- drivers/crypto/chelsio/chcr_core.h | 1 - drivers/crypto/chelsio/chcr_crypto.h | 14 +- 3 files changed, 216 insertions(+), 124 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 17ce6970dab4..8952732c0b7d 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -715,6 +715,52 @@ static int chcr_cipher_fallback(struct crypto_sync_skcipher *cipher, return err; } + +static inline int get_qidxs(struct crypto_async_request *req, + unsigned int *txqidx, unsigned int *rxqidx) +{ + struct crypto_tfm *tfm = req->tfm; + int ret = 0; + + switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_AEAD: + { + struct aead_request *aead_req = + container_of(req, struct aead_request, base); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(aead_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + case CRYPTO_ALG_TYPE_SKCIPHER: + { + struct skcipher_request *sk_req = + container_of(req, struct skcipher_request, base); + struct chcr_skcipher_req_ctx *reqctx = + skcipher_request_ctx(sk_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + case CRYPTO_ALG_TYPE_AHASH: + { + struct ahash_request *ahash_req = + container_of(req, struct ahash_request, base); + struct chcr_ahash_req_ctx *reqctx = + ahash_request_ctx(ahash_req); + *txqidx = reqctx->txqidx; + *rxqidx = reqctx->rxqidx; + break; + } + default: + ret = -EINVAL; + /* should never get here */ + BUG(); + break; + } + return ret; +} + static inline void create_wreq(struct chcr_context *ctx, struct chcr_wr *chcr_req, struct crypto_async_request *req, @@ -725,7 +771,15 @@ static inline void create_wreq(struct chcr_context *ctx, unsigned int lcb) { struct uld_ctx *u_ctx = ULD_CTX(ctx); - int qid = u_ctx->lldi.rxq_ids[ctx->rx_qidx]; + unsigned int tx_channel_id, rx_channel_id; + unsigned int txqidx = 0, rxqidx = 0; + unsigned int qid, fid; + + get_qidxs(req, &txqidx, &rxqidx); + qid = u_ctx->lldi.rxq_ids[rxqidx]; + fid = u_ctx->lldi.rxq_ids[0]; + tx_channel_id = txqidx / ctx->txq_perchan; + rx_channel_id = rxqidx / ctx->rxq_perchan; chcr_req->wreq.op_to_cctx_size = FILL_WR_OP_CCTX_SIZE; @@ -734,15 +788,12 @@ static inline void create_wreq(struct chcr_context *ctx, chcr_req->wreq.len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(len16, 16))); chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req); - chcr_req->wreq.rx_chid_to_rx_q_id = - FILL_WR_RX_Q_ID(ctx->tx_chan_id, qid, - !!lcb, ctx->tx_qidx); + chcr_req->wreq.rx_chid_to_rx_q_id = FILL_WR_RX_Q_ID(rx_channel_id, qid, + !!lcb, txqidx); - chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id, - qid); + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(tx_channel_id, fid); chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) - - ((sizeof(chcr_req->wreq)) >> 4))); - + ((sizeof(chcr_req->wreq)) >> 4))); chcr_req->sc_imm.cmd_more = FILL_CMD_MORE(!imm); chcr_req->sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + sizeof(chcr_req->key_ctx) + sc_len); @@ -758,7 +809,8 @@ static inline void create_wreq(struct chcr_context *ctx, static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req); - struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm)); + struct chcr_context *ctx = c_ctx(tfm); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -771,7 +823,8 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) unsigned int kctx_len; gfp_t flags = wrparam->req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(c_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; nents = sg_nents_xlen(reqctx->dstsg, wrparam->bytes, CHCR_DST_SG_SIZE, reqctx->dst_ofst); @@ -791,7 +844,7 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam) } chcr_req = __skb_put_zero(skb, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(c_ctx(tfm)->tx_chan_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(IV + wrparam->bytes); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1112,7 +1165,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, struct sk_buff *skb; struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input; struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); - struct cipher_wr_param wrparam; + struct cipher_wr_param wrparam; struct chcr_dev *dev = c_ctx(tfm)->dev; int bytes; @@ -1157,7 +1210,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, if (get_cryptoalg_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_CTR) bytes = adjust_ctr_overflow(reqctx->iv, bytes); - wrparam.qid = u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx]; + wrparam.qid = u_ctx->lldi.rxq_ids[reqctx->rxqidx]; wrparam.req = req; wrparam.bytes = bytes; skb = create_cipher_wr(&wrparam); @@ -1167,7 +1220,7 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, goto unmap; } skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); reqctx->last_req_len = bytes; reqctx->processed += bytes; @@ -1307,39 +1360,42 @@ static int chcr_aes_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); - struct chcr_context *ctx; struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; - int err, isfull = 0; + int err; struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); + struct chcr_context *ctx = c_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); err = chcr_inc_wrcount(dev); if (err) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - c_ctx(tfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { err = -ENOSPC; goto error; - } } - err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], + err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], &skb, CHCR_ENCRYPT_OP); if (err || !skb) return err; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); if (get_cryptoalg_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_CBC && req->base.flags == CRYPTO_TFM_REQ_MAY_SLEEP ) { - ctx=c_ctx(tfm); reqctx->partial_req = 1; wait_for_completion(&ctx->cbc_aes_aio_done); } - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; error: chcr_dec_wrcount(dev); return err; @@ -1348,68 +1404,58 @@ error: static int chcr_aes_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req); struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm)); struct chcr_dev *dev = c_ctx(tfm)->dev; struct sk_buff *skb = NULL; - int err, isfull = 0; + int err; + struct chcr_context *ctx = c_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); err = chcr_inc_wrcount(dev); if (err) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - c_ctx(tfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) return -ENOSPC; - } - - err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx], + err = process_cipher(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], &skb, CHCR_DECRYPT_OP); if (err || !skb) return err; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; } - static int chcr_device_init(struct chcr_context *ctx) { struct uld_ctx *u_ctx = NULL; - unsigned int id; - int txq_perchan, txq_idx, ntxq; - int err = 0, rxq_perchan, rxq_idx; + struct adapter *adap; + int txq_perchan, ntxq; + int err = 0, rxq_perchan; - id = smp_processor_id(); if (!ctx->dev) { u_ctx = assign_chcr_device(); if (!u_ctx) { - err = -ENXIO; pr_err("chcr device assignment fails\n"); goto out; } ctx->dev = &u_ctx->dev; + adap = padap(ctx->dev); ntxq = u_ctx->lldi.ntxq; rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; txq_perchan = ntxq / u_ctx->lldi.nchan; - spin_lock(&ctx->dev->lock_chcr_dev); - ctx->tx_chan_id = ctx->dev->tx_channel_id; - ctx->dev->tx_channel_id = - (ctx->dev->tx_channel_id + 1) % u_ctx->lldi.nchan; - spin_unlock(&ctx->dev->lock_chcr_dev); - rxq_idx = ctx->tx_chan_id * rxq_perchan; - rxq_idx += id % rxq_perchan; - txq_idx = ctx->tx_chan_id * txq_perchan; - txq_idx += id % txq_perchan; - ctx->rx_qidx = rxq_idx; - ctx->tx_qidx = txq_idx; - /* Channel Id used by SGE to forward packet to Host. - * Same value should be used in cpl_fw6_pld RSS_CH field - * by FW. Driver programs PCI channel ID to be used in fw - * at the time of queue allocation with value "pi->tx_chan" - */ - ctx->pci_chan_id = txq_idx / txq_perchan; + ctx->ntxq = ntxq; + ctx->nrxq = u_ctx->lldi.nrxq; + ctx->rxq_perchan = rxq_perchan; + ctx->txq_perchan = txq_perchan; } out: return err; @@ -1511,9 +1557,10 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct hmac_ctx *hmacctx = HMAC_CTX(h_ctx(tfm)); + struct chcr_context *ctx = h_ctx(tfm); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); struct sk_buff *skb = NULL; - struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm)); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_wr *chcr_req; struct ulptx_sgl *ulptx; unsigned int nents = 0, transhdr_len; @@ -1522,6 +1569,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, GFP_ATOMIC; struct adapter *adap = padap(h_ctx(tfm)->dev); int error = 0; + unsigned int rx_channel_id = req_ctx->rxqidx / ctx->rxq_perchan; transhdr_len = HASH_TRANSHDR_SIZE(param->kctx_len); req_ctx->hctx_wr.imm = (transhdr_len + param->bfr_len + @@ -1539,7 +1587,8 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req, chcr_req = __skb_put_zero(skb, transhdr_len); chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(h_ctx(tfm)->tx_chan_id, 2, 0); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 0); + chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len); chcr_req->sec_cpl.aadstart_cipherstop_hi = @@ -1602,16 +1651,22 @@ static int chcr_ahash_update(struct ahash_request *req) { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct chcr_dev *dev = h_ctx(rtfm)->dev; struct sk_buff *skb; u8 remainder = 0, bs; unsigned int nbytes = req->nbytes; struct hash_wr_param params; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); if (nbytes + req_ctx->reqlen >= bs) { remainder = (nbytes + req_ctx->reqlen) % bs; @@ -1629,12 +1684,10 @@ static int chcr_ahash_update(struct ahash_request *req) * inflight count for dev guarantees that lldi and padap is valid */ if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); @@ -1676,10 +1729,9 @@ static int chcr_ahash_update(struct ahash_request *req) } req_ctx->reqlen = remainder; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1704,16 +1756,22 @@ static int chcr_ahash_final(struct ahash_request *req) struct chcr_dev *dev = h_ctx(rtfm)->dev; struct hash_wr_param params; struct sk_buff *skb; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); int error = -EINVAL; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); error = chcr_inc_wrcount(dev); if (error) return -ENXIO; chcr_init_hctx_per_wr(req_ctx); - u_ctx = ULD_CTX(h_ctx(rtfm)); if (is_hmac(crypto_ahash_tfm(rtfm))) params.opad_needed = 1; else @@ -1753,7 +1811,7 @@ static int chcr_ahash_final(struct ahash_request *req) } req_ctx->reqlen = 0; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); return -EINPROGRESS; err: @@ -1766,25 +1824,29 @@ static int chcr_ahash_finup(struct ahash_request *req) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); struct chcr_dev *dev = h_ctx(rtfm)->dev; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct sk_buff *skb; struct hash_wr_param params; u8 bs; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); error = chcr_inc_wrcount(dev); if (error) return -ENXIO; if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req); @@ -1842,10 +1904,9 @@ static int chcr_ahash_finup(struct ahash_request *req) req_ctx->reqlen = 0; req_ctx->hctx_wr.processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1858,11 +1919,18 @@ static int chcr_ahash_digest(struct ahash_request *req) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); struct chcr_dev *dev = h_ctx(rtfm)->dev; - struct uld_ctx *u_ctx = NULL; + struct uld_ctx *u_ctx = ULD_CTX(h_ctx(rtfm)); + struct chcr_context *ctx = h_ctx(rtfm); struct sk_buff *skb; struct hash_wr_param params; u8 bs; - int error, isfull = 0; + int error; + unsigned int cpu; + + cpu = get_cpu(); + req_ctx->txqidx = cpu % ctx->ntxq; + req_ctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); rtfm->init(req); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); @@ -1870,14 +1938,11 @@ static int chcr_ahash_digest(struct ahash_request *req) if (error) return -ENXIO; - u_ctx = ULD_CTX(h_ctx(rtfm)); if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - h_ctx(rtfm)->tx_qidx))) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + req_ctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { error = -ENOSPC; goto err; - } } chcr_init_hctx_per_wr(req_ctx); @@ -1933,9 +1998,9 @@ static int chcr_ahash_digest(struct ahash_request *req) } req_ctx->hctx_wr.processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, req_ctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; unmap: chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req); err: @@ -1948,14 +2013,20 @@ static int chcr_ahash_continue(struct ahash_request *req) struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req); struct chcr_hctx_per_wr *hctx_wr = &reqctx->hctx_wr; struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); - struct uld_ctx *u_ctx = NULL; + struct chcr_context *ctx = h_ctx(rtfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; struct hash_wr_param params; u8 bs; int error; + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); - u_ctx = ULD_CTX(h_ctx(rtfm)); get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); params.kctx_len = roundup(params.alg_prm.result_size, 16); if (is_hmac(crypto_ahash_tfm(rtfm))) { @@ -1995,7 +2066,7 @@ static int chcr_ahash_continue(struct ahash_request *req) } hctx_wr->processed += params.sg_len; skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); return 0; err: @@ -2341,7 +2412,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, int size) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct sk_buff *skb = NULL; @@ -2357,7 +2429,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, int null = 0; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(a_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; if (req->cryptlen == 0) return NULL; @@ -2409,7 +2482,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, * to the hardware spec */ chcr_req->sec_cpl.op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(a_ctx(tfm)->tx_chan_id, 2, 1); + FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( null ? 0 : 1 + IV, @@ -2585,13 +2658,14 @@ void chcr_add_aead_dst_ent(struct aead_request *req, unsigned int authsize = crypto_aead_authsize(tfm); struct chcr_context *ctx = a_ctx(tfm); u32 temp; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; dsgl_walk_init(&dsgl_walk, phys_cpl); dsgl_walk_add_page(&dsgl_walk, IV + reqctx->b0_len, reqctx->iv_dma); temp = req->assoclen + req->cryptlen + (reqctx->op ? -authsize : authsize); dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, 0); - dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); + dsgl_walk_end(&dsgl_walk, qid, rx_channel_id); } void chcr_add_cipher_src_ent(struct skcipher_request *req, @@ -2626,14 +2700,14 @@ void chcr_add_cipher_dst_ent(struct skcipher_request *req, struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(wrparam->req); struct chcr_context *ctx = c_ctx(tfm); struct dsgl_walk dsgl_walk; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; dsgl_walk_init(&dsgl_walk, phys_cpl); dsgl_walk_add_sg(&dsgl_walk, reqctx->dstsg, wrparam->bytes, reqctx->dst_ofst); reqctx->dstsg = dsgl_walk.last_sg; reqctx->dst_ofst = dsgl_walk.last_sg_len; - - dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); + dsgl_walk_end(&dsgl_walk, qid, rx_channel_id); } void chcr_add_hash_src_ent(struct ahash_request *req, @@ -2831,10 +2905,12 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, unsigned short op_type) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; - unsigned int c_id = a_ctx(tfm)->tx_chan_id; + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; unsigned int ccm_xtra; unsigned char tag_offset = 0, auth_offset = 0; unsigned int assoclen; @@ -2855,9 +2931,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, auth_offset = 0; } - - sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id, - 2, 1); + sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(rx_channel_id, 2, 1); sec_cpl->pldlen = htonl(req->assoclen + IV + req->cryptlen + ccm_xtra); /* For CCM there wil be b0 always. So AAD start will be 1 always */ @@ -3000,7 +3074,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, int size) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; @@ -3013,7 +3088,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, u8 *ivptr; gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; - struct adapter *adap = padap(a_ctx(tfm)->dev); + struct adapter *adap = padap(ctx->dev); + unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) assoclen = req->assoclen - 8; @@ -3055,7 +3131,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, //Offset of tag from end temp = (reqctx->op == CHCR_ENCRYPT_OP) ? 0 : authsize; chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR( - a_ctx(tfm)->tx_chan_id, 2, 1); + rx_channel_id, 2, 1); chcr_req->sec_cpl.pldlen = htonl(req->assoclen + IV + req->cryptlen); chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( @@ -3603,9 +3679,9 @@ static int chcr_aead_op(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); - struct uld_ctx *u_ctx; + struct chcr_context *ctx = a_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; - int isfull = 0; struct chcr_dev *cdev; cdev = a_ctx(tfm)->dev; @@ -3621,18 +3697,15 @@ static int chcr_aead_op(struct aead_request *req, return chcr_aead_fallback(req, reqctx->op); } - u_ctx = ULD_CTX(a_ctx(tfm)); if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], - a_ctx(tfm)->tx_qidx)) { - isfull = 1; - if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { + reqctx->txqidx) && + (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))) { chcr_dec_wrcount(cdev); return -ENOSPC; - } } /* Form a WR from req */ - skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size); + skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], size); if (IS_ERR_OR_NULL(skb)) { chcr_dec_wrcount(cdev); @@ -3640,15 +3713,22 @@ static int chcr_aead_op(struct aead_request *req, } skb->dev = u_ctx->lldi.ports[0]; - set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx); + set_wr_txq(skb, CPL_PRIORITY_DATA, reqctx->txqidx); chcr_send_wr(skb); - return isfull ? -EBUSY : -EINPROGRESS; + return -EINPROGRESS; } static int chcr_aead_encrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_context *ctx = a_ctx(tfm); + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); reqctx->verify = VERIFY_HW; reqctx->op = CHCR_ENCRYPT_OP; @@ -3670,9 +3750,16 @@ static int chcr_aead_encrypt(struct aead_request *req) static int chcr_aead_decrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); + struct chcr_context *ctx = a_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); int size; + unsigned int cpu; + + cpu = get_cpu(); + reqctx->txqidx = cpu % ctx->ntxq; + reqctx->rxqidx = cpu % ctx->nrxq; + put_cpu(); if (aeadctx->mayverify == VERIFY_SW) { size = crypto_aead_maxauthsize(tfm); diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index b41ef1abfe74..e51c138b0a51 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -148,7 +148,6 @@ struct chcr_dev { int wqretry; struct delayed_work detach_work; struct completion detach_comp; - unsigned char tx_channel_id; }; struct uld_ctx { diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index dbb7e13bb409..542bebae001f 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -187,6 +187,8 @@ struct chcr_aead_reqctx { unsigned int op; u16 imm; u16 verify; + u16 txqidx; + u16 rxqidx; u8 iv[CHCR_MAX_CRYPTO_IV_LEN + MAX_SCRATCH_PAD_SIZE]; u8 *scratch_pad; }; @@ -250,10 +252,10 @@ struct __crypto_ctx { struct chcr_context { struct chcr_dev *dev; - unsigned char tx_qidx; - unsigned char rx_qidx; - unsigned char tx_chan_id; - unsigned char pci_chan_id; + unsigned char rxq_perchan; + unsigned char txq_perchan; + unsigned int ntxq; + unsigned int nrxq; struct completion cbc_aes_aio_done; struct __crypto_ctx crypto_ctx[0]; }; @@ -280,6 +282,8 @@ struct chcr_ahash_req_ctx { u8 *skbfr; /* SKB which is being sent to the hardware for processing */ u64 data_len; /* Data len till time */ + u16 txqidx; + u16 rxqidx; u8 reqlen; u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE]; u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128]; @@ -298,6 +302,8 @@ struct chcr_skcipher_req_ctx { unsigned int op; u16 imm; u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; + u16 txqidx; + u16 rxqidx; }; struct chcr_alg_template { From d282eeeb112a00b05595a034ac4cae8532a4c2fc Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 24 Feb 2020 14:47:41 +0000 Subject: [PATCH 088/162] crypto: arm64/sha-ce - implement export/import When an ahash algorithm fallback to another ahash and that fallback is shaXXX-CE, doing export/import lead to error like this: alg: ahash: sha1-sun8i-ce export() overran state buffer on test vector 0, cfg=\"import/export\" This is due to the descsize of shaxxx-ce being larger than struct shaxxx_state off by an u32. For fixing this, let's implement export/import which rip the finalize variant instead of using generic export/import. Fixes: 6ba6c74dfc6b ("arm64/crypto: SHA-224/SHA-256 using ARMv8 Crypto Extensions") Fixes: 2c98833a42cd ("arm64/crypto: SHA-1 using ARMv8 Crypto Extensions") Signed-off-by: Corentin Labbe Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha1-ce-glue.c | 20 ++++++++++++++++++++ arch/arm64/crypto/sha2-ce-glue.c | 23 +++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 63c875d3314b..565ef604ca04 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -91,12 +91,32 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out) return sha1_base_finish(desc, out); } +static int sha1_ce_export(struct shash_desc *desc, void *out) +{ + struct sha1_ce_state *sctx = shash_desc_ctx(desc); + + memcpy(out, &sctx->sst, sizeof(struct sha1_state)); + return 0; +} + +static int sha1_ce_import(struct shash_desc *desc, const void *in) +{ + struct sha1_ce_state *sctx = shash_desc_ctx(desc); + + memcpy(&sctx->sst, in, sizeof(struct sha1_state)); + sctx->finalize = 0; + return 0; +} + static struct shash_alg alg = { .init = sha1_base_init, .update = sha1_ce_update, .final = sha1_ce_final, .finup = sha1_ce_finup, + .import = sha1_ce_import, + .export = sha1_ce_export, .descsize = sizeof(struct sha1_ce_state), + .statesize = sizeof(struct sha1_state), .digestsize = SHA1_DIGEST_SIZE, .base = { .cra_name = "sha1", diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index a8e67bafba3d..9450d19b9e6e 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -109,12 +109,32 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out) return sha256_base_finish(desc, out); } +static int sha256_ce_export(struct shash_desc *desc, void *out) +{ + struct sha256_ce_state *sctx = shash_desc_ctx(desc); + + memcpy(out, &sctx->sst, sizeof(struct sha256_state)); + return 0; +} + +static int sha256_ce_import(struct shash_desc *desc, const void *in) +{ + struct sha256_ce_state *sctx = shash_desc_ctx(desc); + + memcpy(&sctx->sst, in, sizeof(struct sha256_state)); + sctx->finalize = 0; + return 0; +} + static struct shash_alg algs[] = { { .init = sha224_base_init, .update = sha256_ce_update, .final = sha256_ce_final, .finup = sha256_ce_finup, + .export = sha256_ce_export, + .import = sha256_ce_import, .descsize = sizeof(struct sha256_ce_state), + .statesize = sizeof(struct sha256_state), .digestsize = SHA224_DIGEST_SIZE, .base = { .cra_name = "sha224", @@ -128,7 +148,10 @@ static struct shash_alg algs[] = { { .update = sha256_ce_update, .final = sha256_ce_final, .finup = sha256_ce_finup, + .export = sha256_ce_export, + .import = sha256_ce_import, .descsize = sizeof(struct sha256_ce_state), + .statesize = sizeof(struct sha256_state), .digestsize = SHA256_DIGEST_SIZE, .base = { .cra_name = "sha256", From 5a8a076506af03f83f83d80412a7c0b06b687ee1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 24 Feb 2020 10:21:00 -0600 Subject: [PATCH 089/162] crypto: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 2 +- drivers/crypto/caam/caamalg_qi.c | 4 ++-- drivers/crypto/caam/caamalg_qi2.h | 6 +++--- drivers/crypto/caam/caamhash.c | 2 +- drivers/crypto/cavium/nitrox/nitrox_main.c | 2 +- drivers/crypto/chelsio/chcr_core.h | 2 +- drivers/crypto/mediatek/mtk-sha.c | 2 +- drivers/crypto/nx/nx.h | 2 +- drivers/crypto/omap-sham.c | 4 ++-- include/crypto/if_alg.h | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 03797f9b1050..b7bb7c30adeb 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -909,7 +909,7 @@ struct skcipher_edesc { bool bklog; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; - u32 hw_desc[0]; + u32 hw_desc[]; }; static void caam_unmap(struct device *dev, struct scatterlist *src, diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 4a29e0ef9d63..27e36bdf6163 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -783,7 +783,7 @@ struct aead_edesc { unsigned int assoclen; dma_addr_t assoclen_dma; struct caam_drv_req drv_req; - struct qm_sg_entry sgt[0]; + struct qm_sg_entry sgt[]; }; /* @@ -803,7 +803,7 @@ struct skcipher_edesc { int qm_sg_bytes; dma_addr_t qm_sg_dma; struct caam_drv_req drv_req; - struct qm_sg_entry sgt[0]; + struct qm_sg_entry sgt[]; }; static struct caam_drv_ctx *get_drv_ctx(struct caam_ctx *ctx, diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h index 706736776b47..f29cb7bd7dd3 100644 --- a/drivers/crypto/caam/caamalg_qi2.h +++ b/drivers/crypto/caam/caamalg_qi2.h @@ -114,7 +114,7 @@ struct aead_edesc { dma_addr_t qm_sg_dma; unsigned int assoclen; dma_addr_t assoclen_dma; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /* @@ -132,7 +132,7 @@ struct skcipher_edesc { dma_addr_t iv_dma; int qm_sg_bytes; dma_addr_t qm_sg_dma; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /* @@ -146,7 +146,7 @@ struct ahash_edesc { dma_addr_t qm_sg_dma; int src_nents; int qm_sg_bytes; - struct dpaa2_sg_entry sgt[0]; + struct dpaa2_sg_entry sgt[]; }; /** diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 2fe852853d40..943bc0296267 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -536,7 +536,7 @@ struct ahash_edesc { int sec4_sg_bytes; bool bklog; u32 hw_desc[DESC_JOB_IO_LEN_MAX / sizeof(u32)] ____cacheline_aligned; - struct sec4_sg_entry sec4_sg[0]; + struct sec4_sg_entry sec4_sg[]; }; static inline void ahash_unmap(struct device *dev, diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c index c4632d84c9a1..e91be9b8b083 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_main.c +++ b/drivers/crypto/cavium/nitrox/nitrox_main.c @@ -71,7 +71,7 @@ struct ucode { char version[VERSION_LEN - 1]; __be32 code_size; u8 raz[12]; - u64 code[0]; + u64 code[]; }; /** diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index e51c138b0a51..a4b9337cbf2c 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -68,7 +68,7 @@ struct _key_ctx { __be32 ctx_hdr; u8 salt[MAX_SALT]; __be64 iv_to_auth; - unsigned char key[0]; + unsigned char key[]; }; #define KEYCTX_TX_WR_IV_S 55 diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 9e9f48bb7f85..bd6309e57ab8 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -107,7 +107,7 @@ struct mtk_sha_ctx { u8 id; u8 buf[SHA_BUF_SIZE] __aligned(sizeof(u32)); - struct mtk_sha_hmac_ctx base[0]; + struct mtk_sha_hmac_ctx base[]; }; struct mtk_sha_drv { diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 91c54289124a..c6233173c612 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -37,7 +37,7 @@ struct max_sync_cop { u32 fc; u32 mode; u32 triplets; - struct msc_triplet trip[0]; + struct msc_triplet trip[]; } __packed; struct alg_props { diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 4f915a4ef5b0..e4072cd38585 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -159,7 +159,7 @@ struct omap_sham_reqctx { int sg_len; unsigned int total; /* total request */ - u8 buffer[0] OMAP_ALIGNED; + u8 buffer[] OMAP_ALIGNED; }; struct omap_sham_hmac_ctx { @@ -176,7 +176,7 @@ struct omap_sham_ctx { /* fallback stuff */ struct crypto_shash *fallback; - struct omap_sham_hmac_ctx base[0]; + struct omap_sham_hmac_ctx base[]; }; #define OMAP_SHAM_QUEUE_LENGTH 10 diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 24cfa96f98ea..56527c85d122 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -66,7 +66,7 @@ struct af_alg_sgl { struct af_alg_tsgl { struct list_head list; unsigned int cur; /* Last processed SG entry */ - struct scatterlist sg[0]; /* Array of SGs forming the SGL */ + struct scatterlist sg[]; /* Array of SGs forming the SGL */ }; #define MAX_SGL_ENTS ((4096 - sizeof(struct af_alg_tsgl)) / \ From 11c7686a9359ffc8f72f09d26895341984791d44 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 24 Feb 2020 20:26:02 +0000 Subject: [PATCH 090/162] crypto: sun8i-ss - fix description of stat_fb The description of stat_fb was wrong, let's fix it. Fixes: f08fcced6d00 ("crypto: allwinner - Add sun8i-ss cryptographic offloader") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index b5f855f3de10..29c44f279112 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -186,7 +186,7 @@ struct sun8i_cipher_tfm_ctx { * this template * @alg: one of sub struct must be used * @stat_req: number of request done on this template - * @stat_fb: total of all data len done on this template + * @stat_fb: number of request which has fallbacked */ struct sun8i_ss_alg_template { u32 type; From 5fbab10dee12ce215f615a08e07b51e91f25f2ab Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 24 Feb 2020 20:26:03 +0000 Subject: [PATCH 091/162] crypto: sun8i-ce - fix description of stat_fb The description of stat_fb was wrong, let's fix it Fixes: 06f751b61329 ("crypto: allwinner - Add sun8i-ce Crypto Engine") Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 8f8404c84a4d..0e9eac397e1b 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -214,7 +214,7 @@ struct sun8i_cipher_tfm_ctx { * this template * @alg: one of sub struct must be used * @stat_req: number of request done on this template - * @stat_fb: total of all data len done on this template + * @stat_fb: number of request which has fallbacked */ struct sun8i_ce_alg_template { u32 type; From fa03481b6e2e82355c46644147b614f18c7a8161 Mon Sep 17 00:00:00 2001 From: Rosioru Dragos Date: Tue, 25 Feb 2020 17:05:52 +0200 Subject: [PATCH 092/162] crypto: mxs-dcp - fix scatterlist linearization for hash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The incorrect traversal of the scatterlist, during the linearization phase lead to computing the hash value of the wrong input buffer. New implementation uses scatterwalk_map_and_copy() to address this issue. Cc: Fixes: 15b59e7c3733 ("crypto: mxs - Add Freescale MXS DCP driver") Signed-off-by: Rosioru Dragos Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 54 +++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 435ac1c83df9..d84530293036 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -20,6 +20,7 @@ #include #include #include +#include #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -611,49 +612,46 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); struct hash_alg_common *halg = crypto_hash_alg_common(tfm); - const int nents = sg_nents(req->src); uint8_t *in_buf = sdcp->coh->sha_in_buf; uint8_t *out_buf = sdcp->coh->sha_out_buf; - uint8_t *src_buf; - struct scatterlist *src; - unsigned int i, len, clen; + unsigned int i, len, clen, oft = 0; int ret; int fin = rctx->fini; if (fin) rctx->fini = 0; - for_each_sg(req->src, src, nents, i) { - src_buf = sg_virt(src); - len = sg_dma_len(src); + src = req->src; + len = req->nbytes; - do { - if (actx->fill + len > DCP_BUF_SZ) - clen = DCP_BUF_SZ - actx->fill; - else - clen = len; + while (len) { + if (actx->fill + len > DCP_BUF_SZ) + clen = DCP_BUF_SZ - actx->fill; + else + clen = len; - memcpy(in_buf + actx->fill, src_buf, clen); - len -= clen; - src_buf += clen; - actx->fill += clen; + scatterwalk_map_and_copy(in_buf + actx->fill, src, oft, clen, + 0); - /* - * If we filled the buffer and still have some - * more data, submit the buffer. - */ - if (len && actx->fill == DCP_BUF_SZ) { - ret = mxs_dcp_run_sha(req); - if (ret) - return ret; - actx->fill = 0; - rctx->init = 0; - } - } while (len); + len -= clen; + oft += clen; + actx->fill += clen; + + /* + * If we filled the buffer and still have some + * more data, submit the buffer. + */ + if (len && actx->fill == DCP_BUF_SZ) { + ret = mxs_dcp_run_sha(req); + if (ret) + return ret; + actx->fill = 0; + rctx->init = 0; + } } if (fin) { From 4e98a88049235925568f6ad67059e6af7fea987f Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Tue, 25 Feb 2020 22:21:20 +0530 Subject: [PATCH 093/162] crypto: chelsio/chtls - Fixed tls stats Added tls rx stats and reset tls rx/tx stats when chtls driver unload. Signed-off-by: Vinay Kumar Yadav Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chtls/chtls_io.c | 3 +++ drivers/crypto/chelsio/chtls/chtls_main.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index 5cf9b021220b..781fe7c55a27 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1399,6 +1399,8 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_hws *hws = &csk->tlshws; + struct net_device *dev = csk->egress_dev; + struct adapter *adap = netdev2adap(dev); struct tcp_sock *tp = tcp_sk(sk); unsigned long avail; int buffers_freed; @@ -1540,6 +1542,7 @@ skip_copy: tp->copied_seq += skb->len; hws->rcvpld = skb->hdr_len; } else { + atomic_inc(&adap->chcr_stats.tls_pdu_rx); tp->copied_seq += hws->rcvpld; } chtls_free_skb(sk, skb); diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c index 9f06abd340cc..2110d0893bc7 100644 --- a/drivers/crypto/chelsio/chtls/chtls_main.c +++ b/drivers/crypto/chelsio/chtls/chtls_main.c @@ -174,9 +174,16 @@ static inline void chtls_dev_release(struct kref *kref) { struct tls_toe_device *dev; struct chtls_dev *cdev; + struct adapter *adap; dev = container_of(kref, struct tls_toe_device, kref); cdev = to_chtls_dev(dev); + + /* Reset tls rx/tx stats */ + adap = pci_get_drvdata(cdev->pdev); + atomic_set(&adap->chcr_stats.tls_pdu_tx, 0); + atomic_set(&adap->chcr_stats.tls_pdu_rx, 0); + chtls_free_uld(cdev); } From baaa30fa6166279e34fea8a1c9f08a7bdcdda2e5 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Wed, 26 Feb 2020 09:28:28 +0800 Subject: [PATCH 094/162] MAINTAINERS: add maintainers for uacce Add Zhangfei Gao and Zhou Wang as maintainers for uacce Signed-off-by: Zhangfei Gao Signed-off-by: Zhou Wang Signed-off-by: Herbert Xu --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 12765a0b08e2..ce3597e7e6d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17041,6 +17041,18 @@ W: http://linuxtv.org S: Maintained F: drivers/media/pci/tw686x/ +UACCE ACCELERATOR FRAMEWORK +M: Zhangfei Gao +M: Zhou Wang +L: linux-accelerators@lists.ozlabs.org +L: linux-kernel@vger.kernel.org +S: Maintained +F: Documentation/ABI/testing/sysfs-driver-uacce +F: Documentation/misc-devices/uacce.rst +F: drivers/misc/uacce/ +F: include/linux/uacce.h +F: include/uapi/misc/uacce/ + UBI FILE SYSTEM (UBIFS) M: Richard Weinberger L: linux-mtd@lists.infradead.org From d1dc4df1fe21df4f83b8aa7997310480df72eddc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:13 -0800 Subject: [PATCH 095/162] crypto: authencesn - fix weird comma-terminated line Fix a weird case where a line was terminated by a comma rather than a semicolon, causing the statement to be continued on the next line. Fortunately the code still behaved as intended, though. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/authencesn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 589008146fce..149b70df2a91 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -458,7 +458,7 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, inst->alg.encrypt = crypto_authenc_esn_encrypt; inst->alg.decrypt = crypto_authenc_esn_decrypt; - inst->free = crypto_authenc_esn_free, + inst->free = crypto_authenc_esn_free; err = aead_register_instance(tmpl, inst); if (err) { From 64d66793b76cd3db39137cf9fc7ebe9c2010931d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:14 -0800 Subject: [PATCH 096/162] crypto: ccm - simplify error handling in crypto_rfc4309_create() Simplify the error handling in crypto_rfc4309_create() by taking advantage of crypto_grab_aead() now handling an ERR_PTR() name and by taking advantage of crypto_drop_aead() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/ccm.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/crypto/ccm.c b/crypto/ccm.c index 241ecdc5c4e0..d1fb01bbc814 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -717,7 +717,6 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, struct aead_instance *inst; struct crypto_aead_spawn *spawn; struct aead_alg *alg; - const char *ccm_name; int err; algt = crypto_get_attr_type(tb); @@ -729,19 +728,15 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, mask = crypto_requires_sync(algt->type, algt->mask); - ccm_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(ccm_name)) - return PTR_ERR(ccm_name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = aead_instance_ctx(inst); err = crypto_grab_aead(spawn, aead_crypto_instance(inst), - ccm_name, 0, mask); + crypto_attr_alg_name(tb[1]), 0, mask); if (err) - goto out_free_inst; + goto err_free_inst; alg = crypto_spawn_aead_alg(spawn); @@ -749,11 +744,11 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, /* We only support 16-byte blocks. */ if (crypto_aead_alg_ivsize(alg) != 16) - goto out_drop_alg; + goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) - goto out_drop_alg; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, @@ -762,7 +757,7 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc4309(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -786,17 +781,11 @@ static int crypto_rfc4309_create(struct crypto_template *tmpl, inst->free = crypto_rfc4309_free; err = aead_register_instance(tmpl, inst); - if (err) - goto out_drop_alg; - -out: + if (err) { +err_free_inst: + crypto_rfc4309_free(inst); + } return err; - -out_drop_alg: - crypto_drop_aead(spawn); -out_free_inst: - kfree(inst); - goto out; } static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent, From b8c0d74a70273cea5724073370fb8ffec9783d20 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:15 -0800 Subject: [PATCH 097/162] crypto: cryptd - simplify error handling in cryptd_create_*() Simplify the error handling in the various cryptd_create_*() functions by taking advantage of crypto_grab_*() now handling an ERR_PTR() name and by taking advantage of crypto_drop_*() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/cryptd.c | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index d94c75c840a5..283212262adb 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -369,7 +369,6 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl, struct skcipherd_instance_ctx *ctx; struct skcipher_instance *inst; struct skcipher_alg *alg; - const char *name; u32 type; u32 mask; int err; @@ -379,10 +378,6 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl, cryptd_check_internal(tb, &type, &mask); - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return PTR_ERR(name); - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -391,14 +386,14 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl, ctx->queue = queue; err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), - name, type, mask); + crypto_attr_alg_name(tb[1]), type, mask); if (err) - goto out_free_inst; + goto err_free_inst; alg = crypto_spawn_skcipher_alg(&ctx->spawn); err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base); if (err) - goto out_drop_skcipher; + goto err_free_inst; inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC | (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); @@ -421,10 +416,8 @@ static int cryptd_create_skcipher(struct crypto_template *tmpl, err = skcipher_register_instance(tmpl, inst); if (err) { -out_drop_skcipher: - crypto_drop_skcipher(&ctx->spawn); -out_free_inst: - kfree(inst); +err_free_inst: + cryptd_skcipher_free(inst); } return err; } @@ -694,8 +687,7 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, err = ahash_register_instance(tmpl, inst); if (err) { err_free_inst: - crypto_drop_shash(&ctx->spawn); - kfree(inst); + cryptd_hash_free(inst); } return err; } @@ -833,17 +825,12 @@ static int cryptd_create_aead(struct crypto_template *tmpl, struct aead_instance_ctx *ctx; struct aead_instance *inst; struct aead_alg *alg; - const char *name; u32 type = 0; u32 mask = CRYPTO_ALG_ASYNC; int err; cryptd_check_internal(tb, &type, &mask); - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return PTR_ERR(name); - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -852,14 +839,14 @@ static int cryptd_create_aead(struct crypto_template *tmpl, ctx->queue = queue; err = crypto_grab_aead(&ctx->aead_spawn, aead_crypto_instance(inst), - name, type, mask); + crypto_attr_alg_name(tb[1]), type, mask); if (err) - goto out_free_inst; + goto err_free_inst; alg = crypto_spawn_aead_alg(&ctx->aead_spawn); err = cryptd_init_instance(aead_crypto_instance(inst), &alg->base); if (err) - goto out_drop_aead; + goto err_free_inst; inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC | (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); @@ -879,10 +866,8 @@ static int cryptd_create_aead(struct crypto_template *tmpl, err = aead_register_instance(tmpl, inst); if (err) { -out_drop_aead: - crypto_drop_aead(&ctx->aead_spawn); -out_free_inst: - kfree(inst); +err_free_inst: + cryptd_aead_free(inst); } return err; } From a108dfcff8441193565bb78c4a15bf344ce9b17e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:16 -0800 Subject: [PATCH 098/162] crypto: ctr - simplify error handling in crypto_rfc3686_create() Simplify the error handling in crypto_rfc3686_create() by taking advantage of crypto_grab_skcipher() now handling an ERR_PTR() name and by taking advantage of crypto_drop_skcipher() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/ctr.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/crypto/ctr.c b/crypto/ctr.c index a8feab621c6c..31ac4ae598e1 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -260,7 +260,6 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, struct skcipher_instance *inst; struct skcipher_alg *alg; struct crypto_skcipher_spawn *spawn; - const char *cipher_name; u32 mask; int err; @@ -272,10 +271,6 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) return -EINVAL; - cipher_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(cipher_name)) - return PTR_ERR(cipher_name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -287,7 +282,7 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, spawn = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), - cipher_name, 0, mask); + crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; @@ -296,20 +291,20 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, /* We only support 16-byte blocks. */ err = -EINVAL; if (crypto_skcipher_alg_ivsize(alg) != CTR_RFC3686_BLOCK_SIZE) - goto err_drop_spawn; + goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) - goto err_drop_spawn; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "rfc3686(%s)", alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) - goto err_drop_spawn; + goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc3686(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto err_drop_spawn; + goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = 1; @@ -336,17 +331,11 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, inst->free = crypto_rfc3686_free; err = skcipher_register_instance(tmpl, inst); - if (err) - goto err_drop_spawn; - -out: - return err; - -err_drop_spawn: - crypto_drop_skcipher(spawn); + if (err) { err_free_inst: - kfree(inst); - goto out; + crypto_rfc3686_free(inst); + } + return err; } static struct crypto_template crypto_ctr_tmpls[] = { From 3ff2bab82f42e05f39c77566f23ccbe5693f7982 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:17 -0800 Subject: [PATCH 099/162] crypto: cts - simplify error handling in crypto_cts_create() Simplify the error handling in crypto_cts_create() by taking advantage of crypto_grab_skcipher() now handling an ERR_PTR() name and by taking advantage of crypto_drop_skcipher() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/cts.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/crypto/cts.c b/crypto/cts.c index 48188adc8e91..5e005c4f0221 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -327,7 +327,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) struct skcipher_instance *inst; struct crypto_attr_type *algt; struct skcipher_alg *alg; - const char *cipher_name; u32 mask; int err; @@ -340,10 +339,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) mask = crypto_requires_sync(algt->type, algt->mask); - cipher_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(cipher_name)) - return PTR_ERR(cipher_name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -351,7 +346,7 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) spawn = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), - cipher_name, 0, mask); + crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; @@ -359,15 +354,15 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) err = -EINVAL; if (crypto_skcipher_alg_ivsize(alg) != alg->base.cra_blocksize) - goto err_drop_spawn; + goto err_free_inst; if (strncmp(alg->base.cra_name, "cbc(", 4)) - goto err_drop_spawn; + goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "cts", &alg->base); if (err) - goto err_drop_spawn; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -391,17 +386,11 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) inst->free = crypto_cts_free; err = skcipher_register_instance(tmpl, inst); - if (err) - goto err_drop_spawn; - -out: - return err; - -err_drop_spawn: - crypto_drop_skcipher(spawn); + if (err) { err_free_inst: - kfree(inst); - goto out; + crypto_cts_free(inst); + } + return err; } static struct crypto_template crypto_cts_tmpl = { From 959ac1cdd630b48fad3d8f47c96a01bfc3d71377 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:18 -0800 Subject: [PATCH 100/162] crypto: gcm - simplify error handling in crypto_rfc4106_create() Simplify the error handling in crypto_rfc4106_create() by taking advantage of crypto_grab_aead() now handling an ERR_PTR() name and by taking advantage of crypto_drop_aead() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Conveniently, this eliminates the 'ccm_name' variable which was incorrectly named (it should have been 'gcm_name'). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/gcm.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/crypto/gcm.c b/crypto/gcm.c index 8e5c0ac65661..5560341e2810 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -840,7 +840,6 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, struct aead_instance *inst; struct crypto_aead_spawn *spawn; struct aead_alg *alg; - const char *ccm_name; int err; algt = crypto_get_attr_type(tb); @@ -852,19 +851,15 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, mask = crypto_requires_sync(algt->type, algt->mask); - ccm_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(ccm_name)) - return PTR_ERR(ccm_name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = aead_instance_ctx(inst); err = crypto_grab_aead(spawn, aead_crypto_instance(inst), - ccm_name, 0, mask); + crypto_attr_alg_name(tb[1]), 0, mask); if (err) - goto out_free_inst; + goto err_free_inst; alg = crypto_spawn_aead_alg(spawn); @@ -872,11 +867,11 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, /* Underlying IV size must be 12. */ if (crypto_aead_alg_ivsize(alg) != GCM_AES_IV_SIZE) - goto out_drop_alg; + goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) - goto out_drop_alg; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, @@ -885,7 +880,7 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc4106(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -909,17 +904,11 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl, inst->free = crypto_rfc4106_free; err = aead_register_instance(tmpl, inst); - if (err) - goto out_drop_alg; - -out: + if (err) { +err_free_inst: + crypto_rfc4106_free(inst); + } return err; - -out_drop_alg: - crypto_drop_aead(spawn); -out_free_inst: - kfree(inst); - goto out; } static int crypto_rfc4543_setkey(struct crypto_aead *parent, const u8 *key, From c4caa56d8f995aa34118757c6525cf6048c68def Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:19 -0800 Subject: [PATCH 101/162] crypto: gcm - simplify error handling in crypto_rfc4543_create() Simplify the error handling in crypto_rfc4543_create() by taking advantage of crypto_grab_aead() now handling an ERR_PTR() name and by taking advantage of crypto_drop_aead() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Conveniently, this eliminates the 'ccm_name' variable which was incorrectly named (it should have been 'gcm_name'). Also fix a weird case where a line was terminated by a comma rather than a semicolon, causing the statement to be continued on the next line. Fortunately the code still behaved as intended, though. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/gcm.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/crypto/gcm.c b/crypto/gcm.c index 5560341e2810..0103d28c541e 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -1060,10 +1060,8 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, struct crypto_attr_type *algt; u32 mask; struct aead_instance *inst; - struct crypto_aead_spawn *spawn; struct aead_alg *alg; struct crypto_rfc4543_instance_ctx *ctx; - const char *ccm_name; int err; algt = crypto_get_attr_type(tb); @@ -1075,32 +1073,27 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, mask = crypto_requires_sync(algt->type, algt->mask); - ccm_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(ccm_name)) - return PTR_ERR(ccm_name); - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = aead_instance_ctx(inst); - spawn = &ctx->aead; - err = crypto_grab_aead(spawn, aead_crypto_instance(inst), - ccm_name, 0, mask); + err = crypto_grab_aead(&ctx->aead, aead_crypto_instance(inst), + crypto_attr_alg_name(tb[1]), 0, mask); if (err) - goto out_free_inst; + goto err_free_inst; - alg = crypto_spawn_aead_alg(spawn); + alg = crypto_spawn_aead_alg(&ctx->aead); err = -EINVAL; /* Underlying IV size must be 12. */ if (crypto_aead_alg_ivsize(alg) != GCM_AES_IV_SIZE) - goto out_drop_alg; + goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) - goto out_drop_alg; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, @@ -1109,7 +1102,7 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc4543(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -1130,20 +1123,14 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl, inst->alg.encrypt = crypto_rfc4543_encrypt; inst->alg.decrypt = crypto_rfc4543_decrypt; - inst->free = crypto_rfc4543_free, + inst->free = crypto_rfc4543_free; err = aead_register_instance(tmpl, inst); - if (err) - goto out_drop_alg; - -out: + if (err) { +err_free_inst: + crypto_rfc4543_free(inst); + } return err; - -out_drop_alg: - crypto_drop_aead(spawn); -out_free_inst: - kfree(inst); - goto out; } static struct crypto_template crypto_gcm_tmpls[] = { From 376ffe1acbc642ce38169f7410b1735ffcacd774 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:20 -0800 Subject: [PATCH 102/162] crypto: geniv - simply error handling in aead_geniv_alloc() Simplify the error handling in aead_geniv_alloc() by taking advantage of crypto_grab_aead() now handling an ERR_PTR() name and by taking advantage of crypto_drop_aead() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/geniv.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/crypto/geniv.c b/crypto/geniv.c index dbcc640274cd..6a90c52d49ad 100644 --- a/crypto/geniv.c +++ b/crypto/geniv.c @@ -41,7 +41,6 @@ static void aead_geniv_free(struct aead_instance *inst) struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, struct rtattr **tb, u32 type, u32 mask) { - const char *name; struct crypto_aead_spawn *spawn; struct crypto_attr_type *algt; struct aead_instance *inst; @@ -57,10 +56,6 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return ERR_CAST(name); - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return ERR_PTR(-ENOMEM); @@ -71,7 +66,7 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, mask |= crypto_requires_sync(algt->type, algt->mask); err = crypto_grab_aead(spawn, aead_crypto_instance(inst), - name, type, mask); + crypto_attr_alg_name(tb[1]), type, mask); if (err) goto err_free_inst; @@ -82,17 +77,17 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, err = -EINVAL; if (ivsize < sizeof(u64)) - goto err_drop_alg; + goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", tmpl->name, alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) - goto err_drop_alg; + goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", tmpl->name, alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto err_drop_alg; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -111,10 +106,8 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, out: return inst; -err_drop_alg: - crypto_drop_aead(spawn); err_free_inst: - kfree(inst); + aead_geniv_free(inst); inst = ERR_PTR(err); goto out; } From d57063103332b95eac9c118900f35700a491da08 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:21 -0800 Subject: [PATCH 103/162] crypto: lrw - simplify error handling in create() Simplify the error handling in the LRW template's ->create() function by taking advantage of crypto_drop_skcipher() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/lrw.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index 63c485c0d8a6..376d7ed3f1f8 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -343,15 +343,15 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) err = -EINVAL; if (alg->base.cra_blocksize != LRW_BLOCK_SIZE) - goto err_drop_spawn; + goto err_free_inst; if (crypto_skcipher_alg_ivsize(alg)) - goto err_drop_spawn; + goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw", &alg->base); if (err) - goto err_drop_spawn; + goto err_free_inst; err = -EINVAL; cipher_name = alg->base.cra_name; @@ -364,20 +364,20 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name)); if (len < 2 || len >= sizeof(ecb_name)) - goto err_drop_spawn; + goto err_free_inst; if (ecb_name[len - 1] != ')') - goto err_drop_spawn; + goto err_free_inst; ecb_name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) { err = -ENAMETOOLONG; - goto err_drop_spawn; + goto err_free_inst; } } else - goto err_drop_spawn; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -403,17 +403,11 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) inst->free = free; err = skcipher_register_instance(tmpl, inst); - if (err) - goto err_drop_spawn; - -out: - return err; - -err_drop_spawn: - crypto_drop_skcipher(spawn); + if (err) { err_free_inst: - kfree(inst); - goto out; + free(inst); + } + return err; } static struct crypto_template crypto_tmpl = { From 07b24c7c08bdc2d36de10881a17145426f47742b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:22 -0800 Subject: [PATCH 104/162] crypto: pcrypt - simplify error handling in pcrypt_create_aead() Simplify the error handling in pcrypt_create_aead() by taking advantage of crypto_grab_aead() now handling an ERR_PTR() name and by taking advantage of crypto_drop_aead() now accepting (as a no-op) a spawn that hasn't been grabbed yet. This required also making padata_free_shell() accept a NULL argument. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/pcrypt.c | 33 +++++++++------------------------ kernel/padata.c | 7 ++++--- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 1b632139a8c1..8bddc65cd509 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -232,17 +232,12 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, struct crypto_attr_type *algt; struct aead_instance *inst; struct aead_alg *alg; - const char *name; int err; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) return PTR_ERR(algt); - name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(name)) - return PTR_ERR(name); - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; @@ -252,21 +247,21 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, ctx = aead_instance_ctx(inst); ctx->psenc = padata_alloc_shell(pencrypt); if (!ctx->psenc) - goto out_free_inst; + goto err_free_inst; ctx->psdec = padata_alloc_shell(pdecrypt); if (!ctx->psdec) - goto out_free_psenc; + goto err_free_inst; err = crypto_grab_aead(&ctx->spawn, aead_crypto_instance(inst), - name, 0, 0); + crypto_attr_alg_name(tb[1]), 0, 0); if (err) - goto out_free_psdec; + goto err_free_inst; alg = crypto_spawn_aead_alg(&ctx->spawn); err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base); if (err) - goto out_drop_aead; + goto err_free_inst; inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC; @@ -286,21 +281,11 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, inst->free = pcrypt_free; err = aead_register_instance(tmpl, inst); - if (err) - goto out_drop_aead; - -out: + if (err) { +err_free_inst: + pcrypt_free(inst); + } return err; - -out_drop_aead: - crypto_drop_aead(&ctx->spawn); -out_free_psdec: - padata_free_shell(ctx->psdec); -out_free_psenc: - padata_free_shell(ctx->psenc); -out_free_inst: - kfree(inst); - goto out; } static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb) diff --git a/kernel/padata.c b/kernel/padata.c index 62082597d4a2..a6afa12fb75e 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -1038,12 +1038,13 @@ EXPORT_SYMBOL(padata_alloc_shell); */ void padata_free_shell(struct padata_shell *ps) { - struct padata_instance *pinst = ps->pinst; + if (!ps) + return; - mutex_lock(&pinst->lock); + mutex_lock(&ps->pinst->lock); list_del(&ps->list); padata_free_pd(rcu_dereference_protected(ps->pd, 1)); - mutex_unlock(&pinst->lock); + mutex_unlock(&ps->pinst->lock); kfree(ps); } From 0708bb435354aede92819d44563d3113c7180cf0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:23 -0800 Subject: [PATCH 105/162] crypto: rsa-pkcs1pad - simplify error handling in pkcs1pad_create() Simplify the error handling in pkcs1pad_create() by taking advantage of crypto_grab_akcipher() now handling an ERR_PTR() name and by taking advantage of crypto_drop_akcipher() now accepting (as a no-op) a spawn that hasn't been grabbed yet. While we're at it, also simplify the way the hash_name optional argument is handled. We only need to check whether it's present in one place, and we can just assign directly to ctx->digest_info. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/rsa-pkcs1pad.c | 59 +++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 176b63afec8d..d31031de51bc 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -596,14 +596,11 @@ static void pkcs1pad_free(struct akcipher_instance *inst) static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) { - const struct rsa_asn1_template *digest_info; struct crypto_attr_type *algt; u32 mask; struct akcipher_instance *inst; struct pkcs1pad_inst_ctx *ctx; - struct crypto_akcipher_spawn *spawn; struct akcipher_alg *rsa_alg; - const char *rsa_alg_name; const char *hash_name; int err; @@ -616,60 +613,49 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) mask = crypto_requires_sync(algt->type, algt->mask); - rsa_alg_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(rsa_alg_name)) - return PTR_ERR(rsa_alg_name); - - hash_name = crypto_attr_alg_name(tb[2]); - if (IS_ERR(hash_name)) - hash_name = NULL; - - if (hash_name) { - digest_info = rsa_lookup_asn1(hash_name); - if (!digest_info) - return -EINVAL; - } else - digest_info = NULL; - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = akcipher_instance_ctx(inst); - spawn = &ctx->spawn; - ctx->digest_info = digest_info; - err = crypto_grab_akcipher(spawn, akcipher_crypto_instance(inst), - rsa_alg_name, 0, mask); + err = crypto_grab_akcipher(&ctx->spawn, akcipher_crypto_instance(inst), + crypto_attr_alg_name(tb[1]), 0, mask); if (err) - goto out_free_inst; + goto err_free_inst; - rsa_alg = crypto_spawn_akcipher_alg(spawn); + rsa_alg = crypto_spawn_akcipher_alg(&ctx->spawn); err = -ENAMETOOLONG; - - if (!hash_name) { + hash_name = crypto_attr_alg_name(tb[2]); + if (IS_ERR(hash_name)) { if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", rsa_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)", rsa_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; } else { + ctx->digest_info = rsa_lookup_asn1(hash_name); + if (!ctx->digest_info) { + err = -EINVAL; + goto err_free_inst; + } + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)", rsa_alg->base.cra_name, hash_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)", rsa_alg->base.cra_driver_name, hash_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_alg; + goto err_free_inst; } inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC; @@ -691,15 +677,10 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) inst->free = pkcs1pad_free; err = akcipher_register_instance(tmpl, inst); - if (err) - goto out_drop_alg; - - return 0; - -out_drop_alg: - crypto_drop_akcipher(spawn); -out_free_inst: - kfree(inst); + if (err) { +err_free_inst: + pkcs1pad_free(inst); + } return err; } From 732e540953477083082e999ff553622c59cffd5f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 25 Feb 2020 20:59:24 -0800 Subject: [PATCH 106/162] crypto: xts - simplify error handling in ->create() Simplify the error handling in the XTS template's ->create() function by taking advantage of crypto_drop_skcipher() now accepting (as a no-op) a spawn that hasn't been grabbed yet. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/xts.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/crypto/xts.c b/crypto/xts.c index 29efa15f1495..dbdd8af629e6 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -379,15 +379,15 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) err = -EINVAL; if (alg->base.cra_blocksize != XTS_BLOCK_SIZE) - goto err_drop_spawn; + goto err_free_inst; if (crypto_skcipher_alg_ivsize(alg)) - goto err_drop_spawn; + goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts", &alg->base); if (err) - goto err_drop_spawn; + goto err_free_inst; err = -EINVAL; cipher_name = alg->base.cra_name; @@ -400,20 +400,20 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name)); if (len < 2 || len >= sizeof(ctx->name)) - goto err_drop_spawn; + goto err_free_inst; if (ctx->name[len - 1] != ')') - goto err_drop_spawn; + goto err_free_inst; ctx->name[len - 1] = 0; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) { err = -ENAMETOOLONG; - goto err_drop_spawn; + goto err_free_inst; } } else - goto err_drop_spawn; + goto err_free_inst; inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; inst->alg.base.cra_priority = alg->base.cra_priority; @@ -437,17 +437,11 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) inst->free = free; err = skcipher_register_instance(tmpl, inst); - if (err) - goto err_drop_spawn; - -out: - return err; - -err_drop_spawn: - crypto_drop_skcipher(&ctx->spawn); + if (err) { err_free_inst: - kfree(inst); - goto out; + free(inst); + } + return err; } static struct crypto_template crypto_tmpl = { From acc670dba9f57298a6afd7cdfc9a7254292bbec9 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Wed, 26 Feb 2020 15:12:06 +0800 Subject: [PATCH 107/162] uacce: unmap remaining mmapping from user space When uacce parent device module is removed, user app may still keep the mmaped area, which can be accessed unsafely. When rmmod, Parent device driver will call uacce_remove, which unmap all remaining mapping from user space for safety. VM_FAULT_SIGBUS is also reported to user space accordingly. Suggested-by: Dave Jiang Signed-off-by: Zhangfei Gao Signed-off-by: Herbert Xu --- drivers/misc/uacce/uacce.c | 16 ++++++++++++++++ include/linux/uacce.h | 2 ++ 2 files changed, 18 insertions(+) diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index ffced4da3c2c..d39307f060bd 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -224,6 +224,7 @@ static int uacce_fops_open(struct inode *inode, struct file *filep) init_waitqueue_head(&q->wait); filep->private_data = q; + uacce->inode = inode; q->state = UACCE_Q_INIT; return 0; @@ -253,6 +254,14 @@ static int uacce_fops_release(struct inode *inode, struct file *filep) return 0; } +static vm_fault_t uacce_vma_fault(struct vm_fault *vmf) +{ + if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE)) + return VM_FAULT_SIGBUS; + + return 0; +} + static void uacce_vma_close(struct vm_area_struct *vma) { struct uacce_queue *q = vma->vm_private_data; @@ -265,6 +274,7 @@ static void uacce_vma_close(struct vm_area_struct *vma) } static const struct vm_operations_struct uacce_vm_ops = { + .fault = uacce_vma_fault, .close = uacce_vma_close, }; @@ -556,6 +566,12 @@ void uacce_remove(struct uacce_device *uacce) if (!uacce) return; + /* + * unmap remaining mapping from user space, preventing user still + * access the mmaped area while parent device is already removed + */ + if (uacce->inode) + unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1); /* ensure no open queue remains */ mutex_lock(&uacce->mm_lock); diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 904a461591a3..0e215e6d0534 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -98,6 +98,7 @@ struct uacce_queue { * @priv: private pointer of the uacce * @mm_list: list head of uacce_mm->list * @mm_lock: lock for mm_list + * @inode: core vfs */ struct uacce_device { const char *algs; @@ -113,6 +114,7 @@ struct uacce_device { void *priv; struct list_head mm_list; struct mutex mm_lock; + struct inode *inode; }; /** From 47c16b4499213b8173ad3c033a41d850c6fa0e7c Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Thu, 27 Feb 2020 15:12:28 +0800 Subject: [PATCH 108/162] crypto: hisilicon - qm depends on UACCE If UACCE=m and CRYPTO_DEV_HISI_QM=y, the following error is seen while building qm.o: drivers/crypto/hisilicon/qm.o: In function `hisi_qm_init': (.text+0x23c6): undefined reference to `uacce_alloc' (.text+0x2474): undefined reference to `uacce_remove' (.text+0x286b): undefined reference to `uacce_remove' drivers/crypto/hisilicon/qm.o: In function `hisi_qm_uninit': (.text+0x2918): undefined reference to `uacce_remove' make[1]: *** [vmlinux] Error 1 make: *** [autoksyms_recursive] Error 2 This patch fixes the config dependency for QM and ZIP. reported-by: Hulk Robot Suggested-by: Herbert Xu Signed-off-by: Hongbo Yao Signed-off-by: Zhou Wang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 8851161f722f..095850d01dcc 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -40,6 +40,7 @@ config CRYPTO_DEV_HISI_QM tristate depends on ARM64 || COMPILE_TEST depends on PCI && PCI_MSI + depends on UACCE || UACCE=n help HiSilicon accelerator engines use a common queue management interface. Specific engine driver may use this module. @@ -49,6 +50,7 @@ config CRYPTO_DEV_HISI_ZIP depends on PCI && PCI_MSI depends on ARM64 || (COMPILE_TEST && 64BIT) depends on !CPU_BIG_ENDIAN || COMPILE_TEST + depends on UACCE || UACCE=n select CRYPTO_DEV_HISI_QM help Support for HiSilicon ZIP Driver From a83880068845d4973cdb7aeb52f870c5ce5b146c Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Fri, 28 Feb 2020 11:39:40 +0530 Subject: [PATCH 109/162] crypto: chelsio/chtls - Fixed boolinit.cocci warning crypto: chtls - Fixed boolinit.cocci warning Signed-off-by: Vinay Kumar Yadav Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chtls/chtls_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index 781fe7c55a27..bd47119f4fda 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1081,10 +1081,10 @@ new_buf: pg_size = page_size(page); if (off < pg_size && skb_can_coalesce(skb, i, page, off)) { - merge = 1; + merge = true; goto copy; } - merge = 0; + merge = false; if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : MAX_SKB_FRAGS)) goto new_buf; From 3a5a9e1ef37b030b836d92df8264f840988f4a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 28 Feb 2020 08:51:23 +0200 Subject: [PATCH 110/162] crypto: caam/qi2 - fix chacha20 data size error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HW generates a Data Size error for chacha20 requests that are not a multiple of 64B, since algorithm state (AS) does not have the FINAL bit set. Since updating req->iv (for chaining) is not required, modify skcipher descriptors to set the FINAL bit for chacha20. [Note that for skcipher decryption we know that ctx1_iv_off is 0, which allows for an optimization by not checking algorithm type, since append_dec_op1() sets FINAL bit for all algorithms except AES.] Also drop the descriptor operations that save the IV. However, in order to keep code logic simple, things like S/G tables generation etc. are not touched. Cc: # v5.3+ Fixes: 334d37c9e263 ("crypto: caam - update IV using HW support") Signed-off-by: Horia Geantă Tested-by: Valentin Ciocoi Radulescu Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg_desc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c index aa9ccca67045..6171a8118b5a 100644 --- a/drivers/crypto/caam/caamalg_desc.c +++ b/drivers/crypto/caam/caamalg_desc.c @@ -1379,6 +1379,9 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata, const u32 ctx1_iv_off) { u32 *key_jump_cmd; + u32 options = cdata->algtype | OP_ALG_AS_INIT | OP_ALG_ENCRYPT; + bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) == + OP_ALG_ALGSEL_CHACHA20); init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); /* Skip if already shared */ @@ -1417,14 +1420,15 @@ void cnstr_shdsc_skcipher_encap(u32 * const desc, struct alginfo *cdata, LDST_OFFSET_SHIFT)); /* Load operation */ - append_operation(desc, cdata->algtype | OP_ALG_AS_INIT | - OP_ALG_ENCRYPT); + if (is_chacha20) + options |= OP_ALG_AS_FINALIZE; + append_operation(desc, options); /* Perform operation */ skcipher_append_src_dst(desc); /* Store IV */ - if (ivsize) + if (!is_chacha20 && ivsize) append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); @@ -1451,6 +1455,8 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata, const u32 ctx1_iv_off) { u32 *key_jump_cmd; + bool is_chacha20 = ((cdata->algtype & OP_ALG_ALGSEL_MASK) == + OP_ALG_ALGSEL_CHACHA20); init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); /* Skip if already shared */ @@ -1499,7 +1505,7 @@ void cnstr_shdsc_skcipher_decap(u32 * const desc, struct alginfo *cdata, skcipher_append_src_dst(desc); /* Store IV */ - if (ivsize) + if (!is_chacha20 && ivsize) append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); From 3f142b6a7b573bde6cff926f246da05652c61eb4 Mon Sep 17 00:00:00 2001 From: Andrei Botila Date: Fri, 28 Feb 2020 12:46:48 +0200 Subject: [PATCH 111/162] crypto: caam - update xts sector size for large input length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since in the software implementation of XTS-AES there is no notion of sector every input length is processed the same way. CAAM implementation has the notion of sector which causes different results between the software implementation and the one in CAAM for input lengths bigger than 512 bytes. Increase sector size to maximum value on 16 bits. Fixes: c6415a6016bf ("crypto: caam - add support for acipher xts(aes)") Cc: # v4.12+ Signed-off-by: Andrei Botila Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg_desc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c index 6171a8118b5a..d6c58184bb57 100644 --- a/drivers/crypto/caam/caamalg_desc.c +++ b/drivers/crypto/caam/caamalg_desc.c @@ -1524,7 +1524,13 @@ EXPORT_SYMBOL(cnstr_shdsc_skcipher_decap); */ void cnstr_shdsc_xts_skcipher_encap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); @@ -1577,7 +1583,13 @@ EXPORT_SYMBOL(cnstr_shdsc_xts_skcipher_encap); */ void cnstr_shdsc_xts_skcipher_decap(u32 * const desc, struct alginfo *cdata) { - __be64 sector_size = cpu_to_be64(512); + /* + * Set sector size to a big value, practically disabling + * sector size segmentation in xts implementation. We cannot + * take full advantage of this HW feature with existing + * crypto API / dm-crypt SW architecture. + */ + __be64 sector_size = cpu_to_be64(BIT(15)); u32 *key_jump_cmd; init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); From dc7fc3a53ae158263196b1892b672aedf67796c5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 1 Mar 2020 16:06:56 +0800 Subject: [PATCH 112/162] crypto: x86/curve25519 - leave r12 as spare register This updates to the newer register selection proved by HACL*, which leads to a more compact instruction encoding, and saves around 100 cycles. Signed-off-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- arch/x86/crypto/curve25519-x86_64.c | 110 ++++++++++++++-------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c index e4e58b8e9afe..8a17621f7d3a 100644 --- a/arch/x86/crypto/curve25519-x86_64.c +++ b/arch/x86/crypto/curve25519-x86_64.c @@ -167,28 +167,28 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) " movq 0(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" /* Compute src1[1] * src2 */ " movq 8(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[2] * src2 */ " movq 16(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[3] * src2 */ " movq 24(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" /* Line up pointers */ @@ -202,11 +202,11 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) " mulxq 32(%1), %%r8, %%r13;" " xor %3, %3;" " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%r12;" + " mulxq 40(%1), %%r9, %%rbx;" " adcx %%r13, %%r9;" " adoxq 8(%1), %%r9;" " mulxq 48(%1), %%r10, %%r13;" - " adcx %%r12, %%r10;" + " adcx %%rbx, %%r10;" " adoxq 16(%1), %%r10;" " mulxq 56(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" @@ -231,7 +231,7 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) " movq %%r8, 0(%0);" : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) : - : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" ); } @@ -248,28 +248,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) " movq 0(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" /* Compute src1[1] * src2 */ " movq 8(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);" - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[2] * src2 */ " movq 16(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);" - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[3] * src2 */ " movq 24(%1), %%rdx;" " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);" - " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;" + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" + " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" @@ -279,28 +279,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) " movq 32(%1), %%rdx;" " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" - " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" /* Compute src1[1] * src2 */ " movq 40(%1), %%rdx;" " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);" - " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);" + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[2] * src2 */ " movq 48(%1), %%rdx;" " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);" - " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);" + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" /* Compute src1[3] * src2 */ " movq 56(%1), %%rdx;" " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);" - " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;" + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);" + " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;" " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" /* Line up pointers */ @@ -314,11 +314,11 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) " mulxq 32(%1), %%r8, %%r13;" " xor %3, %3;" " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%r12;" + " mulxq 40(%1), %%r9, %%rbx;" " adcx %%r13, %%r9;" " adoxq 8(%1), %%r9;" " mulxq 48(%1), %%r10, %%r13;" - " adcx %%r12, %%r10;" + " adcx %%rbx, %%r10;" " adoxq 16(%1), %%r10;" " mulxq 56(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" @@ -347,11 +347,11 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) " mulxq 96(%1), %%r8, %%r13;" " xor %3, %3;" " adoxq 64(%1), %%r8;" - " mulxq 104(%1), %%r9, %%r12;" + " mulxq 104(%1), %%r9, %%rbx;" " adcx %%r13, %%r9;" " adoxq 72(%1), %%r9;" " mulxq 112(%1), %%r10, %%r13;" - " adcx %%r12, %%r10;" + " adcx %%rbx, %%r10;" " adoxq 80(%1), %%r10;" " mulxq 120(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" @@ -376,7 +376,7 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) " movq %%r8, 32(%0);" : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) : - : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc" + : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" ); } @@ -388,11 +388,11 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) asm volatile( /* Compute the raw multiplication of f1*f2 */ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ - " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */ + " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ " add %%rcx, %%r9;" " mov $0, %%rcx;" " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ - " adcx %%r12, %%r10;" + " adcx %%rbx, %%r10;" " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ " adcx %%r13, %%r11;" " adcx %%rcx, %%rax;" @@ -419,7 +419,7 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) " movq %%r8, 0(%1);" : "+&r" (f2_r) : "r" (out), "r" (f1) - : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc" + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc" ); } @@ -520,8 +520,8 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ " movq 24(%1), %%rdx;" /* f[3] */ - " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ @@ -531,12 +531,12 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" " adcx %%r9, %%r9;" - " adox %%r15, %%r12;" + " adox %%r15, %%rbx;" " adcx %%r10, %%r10;" " adox %%r15, %%r13;" " adcx %%r11, %%r11;" " adox %%r15, %%r14;" - " adcx %%r12, %%r12;" + " adcx %%rbx, %%rbx;" " adcx %%r13, %%r13;" " adcx %%r14, %%r14;" @@ -549,7 +549,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" - " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" + " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" @@ -565,11 +565,11 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) " mulxq 32(%1), %%r8, %%r13;" " xor %%rcx, %%rcx;" " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%r12;" + " mulxq 40(%1), %%r9, %%rbx;" " adcx %%r13, %%r9;" " adoxq 8(%1), %%r9;" " mulxq 48(%1), %%r10, %%r13;" - " adcx %%r12, %%r10;" + " adcx %%rbx, %%r10;" " adoxq 16(%1), %%r10;" " mulxq 56(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" @@ -594,7 +594,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) " movq %%r8, 0(%0);" : "+&r" (tmp), "+&r" (f), "+&r" (out) : - : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" ); } @@ -611,8 +611,8 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ " movq 24(%1), %%rdx;" /* f[3] */ - " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ @@ -622,12 +622,12 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" " adcx %%r9, %%r9;" - " adox %%r15, %%r12;" + " adox %%r15, %%rbx;" " adcx %%r10, %%r10;" " adox %%r15, %%r13;" " adcx %%r11, %%r11;" " adox %%r15, %%r14;" - " adcx %%r12, %%r12;" + " adcx %%rbx, %%rbx;" " adcx %%r13, %%r13;" " adcx %%r14, %%r14;" @@ -640,7 +640,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" - " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);" + " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" @@ -651,8 +651,8 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ " movq 56(%1), %%rdx;" /* f[3] */ - " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */ + " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ @@ -662,12 +662,12 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" " adcx %%r9, %%r9;" - " adox %%r15, %%r12;" + " adox %%r15, %%rbx;" " adcx %%r10, %%r10;" " adox %%r15, %%r13;" " adcx %%r11, %%r11;" " adox %%r15, %%r14;" - " adcx %%r12, %%r12;" + " adcx %%rbx, %%rbx;" " adcx %%r13, %%r13;" " adcx %%r14, %%r14;" @@ -680,7 +680,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" - " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);" + " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);" " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" @@ -694,11 +694,11 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " mulxq 32(%1), %%r8, %%r13;" " xor %%rcx, %%rcx;" " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%r12;" + " mulxq 40(%1), %%r9, %%rbx;" " adcx %%r13, %%r9;" " adoxq 8(%1), %%r9;" " mulxq 48(%1), %%r10, %%r13;" - " adcx %%r12, %%r10;" + " adcx %%rbx, %%r10;" " adoxq 16(%1), %%r10;" " mulxq 56(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" @@ -727,11 +727,11 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " mulxq 96(%1), %%r8, %%r13;" " xor %%rcx, %%rcx;" " adoxq 64(%1), %%r8;" - " mulxq 104(%1), %%r9, %%r12;" + " mulxq 104(%1), %%r9, %%rbx;" " adcx %%r13, %%r9;" " adoxq 72(%1), %%r9;" " mulxq 112(%1), %%r10, %%r13;" - " adcx %%r12, %%r10;" + " adcx %%rbx, %%r10;" " adoxq 80(%1), %%r10;" " mulxq 120(%1), %%r11, %%rax;" " adcx %%r13, %%r11;" @@ -756,7 +756,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " movq %%r8, 32(%0);" : "+&r" (tmp), "+&r" (f), "+&r" (out) : - : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc" + : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" ); } From 7548bf8c17d84607c106bd45d81834afd95a2edb Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 2 Mar 2020 00:37:14 +0100 Subject: [PATCH 113/162] crypto: arm/ghash-ce - define fpu before fpu registers are referenced Building ARMv7 with Clang's integrated assembler leads to errors such as: arch/arm/crypto/ghash-ce-core.S:34:11: error: register name expected t3l .req d16 ^ Since no FPU has selected yet Clang considers d16 not a valid register. Moving the FPU directive on-top allows Clang to parse the registers and allows to successfully build this file with Clang's integrated assembler. Signed-off-by: Stefan Agner Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/ghash-ce-core.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index 534c9647726d..9f51e3fa4526 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -8,6 +8,9 @@ #include #include + .arch armv8-a + .fpu crypto-neon-fp-armv8 + SHASH .req q0 T1 .req q1 XL .req q2 @@ -88,8 +91,6 @@ T3_H .req d17 .text - .arch armv8-a - .fpu crypto-neon-fp-armv8 .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 vmull.p64 \rd, \rn, \rm From 1f14b57ffc0adcbba0cf35214b0a6129ceae9d09 Mon Sep 17 00:00:00 2001 From: John Allen Date: Tue, 3 Mar 2020 07:57:23 -0600 Subject: [PATCH 114/162] crypto: ccp - Cleanup misc_dev on sev_exit() Explicitly free and clear misc_dev in sev_exit(). Since devm_kzalloc() associates misc_dev with the first device that gets probed, change from devm_kzalloc() to kzalloc() and explicitly free memory in sev_exit() as the first device probed is not guaranteed to be the last device released. To ensure that the variable gets properly set to NULL, remove the local definition of misc_dev. Fixes: 200664d5237f ("crypto: ccp: Add Secure Encrypted Virtualization (SEV) command support") Signed-off-by: John Allen Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e467860f797d..aa591dae067c 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -896,9 +896,9 @@ EXPORT_SYMBOL_GPL(sev_guest_df_flush); static void sev_exit(struct kref *ref) { - struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount); - misc_deregister(&misc_dev->misc); + kfree(misc_dev); + misc_dev = NULL; } static int sev_misc_init(struct sev_device *sev) @@ -916,7 +916,7 @@ static int sev_misc_init(struct sev_device *sev) if (!misc_dev) { struct miscdevice *misc; - misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL); + misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL); if (!misc_dev) return -ENOMEM; From 15f7a4c65293d188651e21a9c5ab6bd8f99a1459 Mon Sep 17 00:00:00 2001 From: John Allen Date: Tue, 3 Mar 2020 07:57:24 -0600 Subject: [PATCH 115/162] crypto: ccp - Cleanup sp_dev_master in psp_dev_destroy() Introduce clear_psp_master_device() to ensure that sp_dev_master gets properly cleared on the release of a psp device. Fixes: 2a6170dfe755 ("crypto: ccp: Add Platform Security Processor (PSP) device support") Signed-off-by: John Allen Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/psp-dev.c | 3 +++ drivers/crypto/ccp/sp-dev.h | 1 + drivers/crypto/ccp/sp-pci.c | 9 +++++++++ 3 files changed, 13 insertions(+) diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index e95e7aa5dbf1..ae7b44599914 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -215,6 +215,9 @@ void psp_dev_destroy(struct sp_device *sp) tee_dev_destroy(psp); sp_free_psp_irq(sp, psp); + + if (sp->clear_psp_master_device) + sp->clear_psp_master_device(sp); } void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler, diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h index 423594608ad1..f913f1494af9 100644 --- a/drivers/crypto/ccp/sp-dev.h +++ b/drivers/crypto/ccp/sp-dev.h @@ -90,6 +90,7 @@ struct sp_device { /* get and set master device */ struct sp_device*(*get_psp_master_device)(void); void (*set_psp_master_device)(struct sp_device *); + void (*clear_psp_master_device)(struct sp_device *); bool irq_registered; bool use_tasklet; diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 56c1f61c0f84..cb6cb47053f4 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -146,6 +146,14 @@ static struct sp_device *psp_get_master(void) return sp_dev_master; } +static void psp_clear_master(struct sp_device *sp) +{ + if (sp == sp_dev_master) { + sp_dev_master = NULL; + dev_dbg(sp->dev, "Cleared sp_dev_master\n"); + } +} + static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct sp_device *sp; @@ -206,6 +214,7 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); sp->set_psp_master_device = psp_set_master; sp->get_psp_master_device = psp_get_master; + sp->clear_psp_master_device = psp_clear_master; ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (ret) { From 542b7ffee3e26ffef5d63e7b5ab7602129daeaad Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 3 Mar 2020 23:48:09 +0800 Subject: [PATCH 116/162] crypto: qat - simplify the qat_crypto function simplify code to remove unnecessary constant string copies. Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_crypto.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 3852d31ce0a4..fb504cee0305 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -250,8 +250,7 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; INIT_LIST_HEAD(&accel_dev->crypto_list); - strlcpy(key, ADF_NUM_CY, sizeof(key)); - if (adf_cfg_get_param_value(accel_dev, SEC, key, val)) + if (adf_cfg_get_param_value(accel_dev, SEC, ADF_NUM_CY, val)) return -EFAULT; if (kstrtoul(val, 0, &num_inst)) From d069b20403d7fe5585d38d2c04b2d429fc95f3e6 Mon Sep 17 00:00:00 2001 From: Eneas U de Queiroz Date: Wed, 4 Mar 2020 15:24:55 -0300 Subject: [PATCH 117/162] crypto: qce - fix wrong config symbol reference The CONFIG_CRYPTO_DEV_QCE_SOFT_THRESHOLD symbol was renamed during development, but the stringify reference in the parameter description sneaked by unnoticed. Signed-off-by: Eneas U de Queiroz Signed-off-by: Herbert Xu --- drivers/crypto/qce/skcipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index a4f6ec1b64c7..9412433f3b21 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -18,7 +18,7 @@ module_param(aes_sw_max_len, uint, 0644); MODULE_PARM_DESC(aes_sw_max_len, "Only use hardware for AES requests larger than this " "[0=always use hardware; anything <16 breaks AES-GCM; default=" - __stringify(CONFIG_CRYPTO_DEV_QCE_SOFT_THRESHOLD)"]"); + __stringify(CONFIG_CRYPTO_DEV_QCE_SW_MAX_LEN)"]"); static LIST_HEAD(skcipher_algs); From 6f3a06d959f4f38f2beb0a8f25b0e2d5c9792b18 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 4 Mar 2020 14:44:03 -0800 Subject: [PATCH 118/162] crypto: testmgr - use consistent IV copies for AEADs that need it rfc4543 was missing from the list of algorithms that may treat the end of the AAD buffer specially. Also, with rfc4106, rfc4309, rfc4543, and rfc7539esp, the end of the AAD buffer is actually supposed to contain a second copy of the IV, and we've concluded that if the IV copies don't match the behavior is implementation-defined. So, the fuzz tests can't easily test that case. So, make the fuzz tests only use inputs where the two IV copies match. Reported-by: Geert Uytterhoeven Fixes: 40153b10d91c ("crypto: testmgr - fuzz AEADs against their generic implementation") Cc: Stephan Mueller Originally-from: Gilad Ben-Yossef Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/testmgr.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 88f33c0efb23..0a10dbde27ef 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -91,10 +91,11 @@ struct aead_test_suite { unsigned int einval_allowed : 1; /* - * Set if the algorithm intentionally ignores the last 8 bytes of the - * AAD buffer during decryption. + * Set if this algorithm requires that the IV be located at the end of + * the AAD buffer, in addition to being given in the normal way. The + * behavior when the two IV copies differ is implementation-defined. */ - unsigned int esp_aad : 1; + unsigned int aad_iv : 1; }; struct cipher_test_suite { @@ -2167,9 +2168,10 @@ struct aead_extra_tests_ctx { * here means the full ciphertext including the authentication tag. The * authentication tag (and hence also the ciphertext) is assumed to be nonempty. */ -static void mutate_aead_message(struct aead_testvec *vec, bool esp_aad) +static void mutate_aead_message(struct aead_testvec *vec, bool aad_iv, + unsigned int ivsize) { - const unsigned int aad_tail_size = esp_aad ? 8 : 0; + const unsigned int aad_tail_size = aad_iv ? ivsize : 0; const unsigned int authsize = vec->clen - vec->plen; if (prandom_u32() % 2 == 0 && vec->alen > aad_tail_size) { @@ -2207,6 +2209,9 @@ static void generate_aead_message(struct aead_request *req, /* Generate the AAD. */ generate_random_bytes((u8 *)vec->assoc, vec->alen); + if (suite->aad_iv && vec->alen >= ivsize) + /* Avoid implementation-defined behavior. */ + memcpy((u8 *)vec->assoc + vec->alen - ivsize, vec->iv, ivsize); if (inauthentic && prandom_u32() % 2 == 0) { /* Generate a random ciphertext. */ @@ -2242,7 +2247,7 @@ static void generate_aead_message(struct aead_request *req, * Mutate the authentic (ciphertext, AAD) pair to get an * inauthentic one. */ - mutate_aead_message(vec, suite->esp_aad); + mutate_aead_message(vec, suite->aad_iv, ivsize); } vec->novrfy = 1; if (suite->einval_allowed) @@ -5202,7 +5207,7 @@ static const struct alg_test_desc alg_test_descs[] = { .aead = { ____VECS(aes_gcm_rfc4106_tv_template), .einval_allowed = 1, - .esp_aad = 1, + .aad_iv = 1, } } }, { @@ -5214,7 +5219,7 @@ static const struct alg_test_desc alg_test_descs[] = { .aead = { ____VECS(aes_ccm_rfc4309_tv_template), .einval_allowed = 1, - .esp_aad = 1, + .aad_iv = 1, } } }, { @@ -5225,6 +5230,7 @@ static const struct alg_test_desc alg_test_descs[] = { .aead = { ____VECS(aes_gcm_rfc4543_tv_template), .einval_allowed = 1, + .aad_iv = 1, } } }, { @@ -5240,7 +5246,7 @@ static const struct alg_test_desc alg_test_descs[] = { .aead = { ____VECS(rfc7539esp_tv_template), .einval_allowed = 1, - .esp_aad = 1, + .aad_iv = 1, } } }, { From 8ff357a9d146b94d0a165449cad59adbe36f8b50 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 4 Mar 2020 14:44:04 -0800 Subject: [PATCH 119/162] crypto: testmgr - do comparison tests before inauthentic input tests Do test_aead_vs_generic_impl() before test_aead_inauthentic_inputs() so that any differences with the generic driver are detected before getting to the inauthentic input tests, which intentionally use only the driver being tested (so that they run even if a generic driver is unavailable). Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/testmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 0a10dbde27ef..428a5f8bc80f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2512,11 +2512,11 @@ static int test_aead_extra(const char *driver, goto out; } - err = test_aead_inauthentic_inputs(ctx); + err = test_aead_vs_generic_impl(ctx); if (err) goto out; - err = test_aead_vs_generic_impl(ctx); + err = test_aead_inauthentic_inputs(ctx); out: kfree(ctx->vec.key); kfree(ctx->vec.iv); From 3cd54a4c3c32f38fe0a57edeed10f95944dd0878 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 4 Mar 2020 14:44:05 -0800 Subject: [PATCH 120/162] crypto: aead - improve documentation for scatterlist layout Properly document the scatterlist layout for AEAD ciphers. Reported-by: Gilad Ben-Yossef Cc: Stephan Mueller Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- include/crypto/aead.h | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 1b3ebe8593c0..62c68550aab6 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -43,27 +43,33 @@ * * Memory Structure: * - * To support the needs of the most prominent user of AEAD ciphers, namely - * IPSEC, the AEAD ciphers have a special memory layout the caller must adhere - * to. + * The source scatterlist must contain the concatenation of + * associated data || plaintext or ciphertext. * - * The scatter list pointing to the input data must contain: + * The destination scatterlist has the same layout, except that the plaintext + * (resp. ciphertext) will grow (resp. shrink) by the authentication tag size + * during encryption (resp. decryption). * - * * for RFC4106 ciphers, the concatenation of - * associated authentication data || IV || plaintext or ciphertext. Note, the - * same IV (buffer) is also set with the aead_request_set_crypt call. Note, - * the API call of aead_request_set_ad must provide the length of the AAD and - * the IV. The API call of aead_request_set_crypt only points to the size of - * the input plaintext or ciphertext. + * In-place encryption/decryption is enabled by using the same scatterlist + * pointer for both the source and destination. * - * * for "normal" AEAD ciphers, the concatenation of - * associated authentication data || plaintext or ciphertext. + * Even in the out-of-place case, space must be reserved in the destination for + * the associated data, even though it won't be written to. This makes the + * in-place and out-of-place cases more consistent. It is permissible for the + * "destination" associated data to alias the "source" associated data. * - * It is important to note that if multiple scatter gather list entries form - * the input data mentioned above, the first entry must not point to a NULL - * buffer. If there is any potential where the AAD buffer can be NULL, the - * calling code must contain a precaution to ensure that this does not result - * in the first scatter gather list entry pointing to a NULL buffer. + * As with the other scatterlist crypto APIs, zero-length scatterlist elements + * are not allowed in the used part of the scatterlist. Thus, if there is no + * associated data, the first element must point to the plaintext/ciphertext. + * + * To meet the needs of IPsec, a special quirk applies to rfc4106, rfc4309, + * rfc4543, and rfc7539esp ciphers. For these ciphers, the final 'ivsize' bytes + * of the associated data buffer must contain a second copy of the IV. This is + * in addition to the copy passed to aead_request_set_crypt(). These two IV + * copies must not differ; different implementations of the same algorithm may + * behave differently in that case. Note that the algorithm might not actually + * treat the IV as associated data; nevertheless the length passed to + * aead_request_set_ad() must include it. */ struct crypto_aead; From 57ca81245f4db4a0222d545f8f5d4709544c26cf Mon Sep 17 00:00:00 2001 From: Shukun Tan Date: Thu, 5 Mar 2020 10:06:21 +0800 Subject: [PATCH 121/162] crypto: hisilicon - Use one workqueue per qm instead of per qp Since SEC need not so many workqueues as our test, we just use one workqueue created by the device driver of QM if necessary, which will also reduce CPU waste without any throughput decreasing. Signed-off-by: Shukun Tan Signed-off-by: Zaibo Xu Reviewed-by: Jonathan Cameron Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 39 ++++++++++++++--------------------- drivers/crypto/hisilicon/qm.h | 5 +++-- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index ad7146a0c577..2fa1d1ae0984 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -494,17 +494,9 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm) } } -static void qm_qp_work_func(struct work_struct *work) +static void qm_work_process(struct work_struct *work) { - struct hisi_qp *qp; - - qp = container_of(work, struct hisi_qp, work); - qm_poll_qp(qp, qp->qm); -} - -static irqreturn_t qm_irq_handler(int irq, void *data) -{ - struct hisi_qm *qm = data; + struct hisi_qm *qm = container_of(work, struct hisi_qm, work); struct qm_eqe *eqe = qm->eqe + qm->status.eq_head; struct hisi_qp *qp; int eqe_num = 0; @@ -513,7 +505,7 @@ static irqreturn_t qm_irq_handler(int irq, void *data) eqe_num++; qp = qm_to_hisi_qp(qm, eqe); if (qp) - queue_work(qp->wq, &qp->work); + qm_poll_qp(qp, qm); if (qm->status.eq_head == QM_Q_DEPTH - 1) { qm->status.eqc_phase = !qm->status.eqc_phase; @@ -531,6 +523,17 @@ static irqreturn_t qm_irq_handler(int irq, void *data) } qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0); +} + +static irqreturn_t do_qm_irq(int irq, void *data) +{ + struct hisi_qm *qm = (struct hisi_qm *)data; + + /* the workqueue created by device driver of QM */ + if (qm->wq) + queue_work(qm->wq, &qm->work); + else + schedule_work(&qm->work); return IRQ_HANDLED; } @@ -540,7 +543,7 @@ static irqreturn_t qm_irq(int irq, void *data) struct hisi_qm *qm = data; if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE)) - return qm_irq_handler(irq, data); + return do_qm_irq(irq, data); dev_err(&qm->pdev->dev, "invalid int source\n"); qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0); @@ -1159,20 +1162,9 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type) qp->qp_id = qp_id; qp->alg_type = alg_type; - INIT_WORK(&qp->work, qm_qp_work_func); - qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI | - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0); - if (!qp->wq) { - ret = -EFAULT; - goto err_free_qp_mem; - } return qp; -err_free_qp_mem: - if (qm->use_dma_api) - dma_free_coherent(dev, qp->qdma.size, qp->qdma.va, - qp->qdma.dma); err_clear_bit: write_lock(&qm->qps_lock); qm->qp_array[qp_id] = NULL; @@ -1704,6 +1696,7 @@ int hisi_qm_init(struct hisi_qm *qm) qm->qp_in_used = 0; mutex_init(&qm->mailbox_lock); rwlock_init(&qm->qps_lock); + INIT_WORK(&qm->work, qm_work_process); dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf", qm->use_dma_api ? "dma api" : "iommu api"); diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index 1a4f208a9448..c72c2e64d04b 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -183,6 +183,9 @@ struct hisi_qm { u32 error_mask; u32 msi_mask; + struct workqueue_struct *wq; + struct work_struct work; + const char *algs; bool use_dma_api; bool use_sva; @@ -219,8 +222,6 @@ struct hisi_qp { void *qp_ctx; void (*req_cb)(struct hisi_qp *qp, void *data); void (*event_cb)(struct hisi_qp *qp); - struct work_struct work; - struct workqueue_struct *wq; struct hisi_qm *qm; u16 pasid; From a13c97118749954a9f47a3b5fc47457baf664f2d Mon Sep 17 00:00:00 2001 From: Ye Kai Date: Thu, 5 Mar 2020 10:06:22 +0800 Subject: [PATCH 122/162] crypto: hisilicon/sec2 - Add workqueue for SEC driver. Allocate one workqueue for each QM instead of one for all QMs, we found the throughput of SEC engine can be increased to the hardware limit throughput during testing sec2 performance. so we added this scheme. Signed-off-by: Ye Kai Signed-off-by: Longfang Liu Signed-off-by: Zaibo Xu Reviewed-by: Jonathan Cameron Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_main.c | 32 +++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 3767fdb64d98..1fe2558bc22e 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -774,12 +774,30 @@ static void sec_qm_uninit(struct hisi_qm *qm) static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec) { + int ret; + + /* + * WQ_HIGHPRI: SEC request must be low delayed, + * so need a high priority workqueue. + * WQ_UNBOUND: SEC task is likely with long + * running CPU intensive workloads. + */ + qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | + WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(), + pci_name(qm->pdev)); + if (!qm->wq) { + pci_err(qm->pdev, "fail to alloc workqueue\n"); + return -ENOMEM; + } + if (qm->fun_type == QM_HW_PF) { qm->qp_base = SEC_PF_DEF_Q_BASE; qm->qp_num = pf_q_num; qm->debug.curr_qm_qp_num = pf_q_num; - return sec_pf_probe_init(sec); + ret = sec_pf_probe_init(sec); + if (ret) + goto err_probe_uninit; } else if (qm->fun_type == QM_HW_VF) { /* * have no way to get qm configure in VM in v1 hardware, @@ -792,18 +810,26 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec) qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM; } else if (qm->ver == QM_HW_V2) { /* v2 starts to support get vft by mailbox */ - return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num); + ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num); + if (ret) + goto err_probe_uninit; } } else { - return -ENODEV; + ret = -ENODEV; + goto err_probe_uninit; } return 0; +err_probe_uninit: + destroy_workqueue(qm->wq); + return ret; } static void sec_probe_uninit(struct hisi_qm *qm) { hisi_qm_dev_err_uninit(qm); + + destroy_workqueue(qm->wq); } static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) From 8824bc5ed1e7fbc23529c9b2e799bb9414cb6e52 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Thu, 5 Mar 2020 10:06:23 +0800 Subject: [PATCH 123/162] crypto: hisilicon/sec2 - Add iommu status check In order to improve performance of small packets (<512Bytes) in SMMU translation scenario, we need to identify the type of IOMMU in the SEC probe to process small packets by a different method. Signed-off-by: Longfang Liu Signed-off-by: Zaibo Xu Reviewed-by: Jonathan Cameron Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 1 + drivers/crypto/hisilicon/sec2/sec_main.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 13e2d8d7be94..eab0d22fc57e 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -165,6 +165,7 @@ struct sec_dev { struct list_head list; struct sec_debug debug; u32 ctx_q_num; + bool iommu_used; u32 num_vfs; unsigned long status; }; diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 1fe2558bc22e..4f354d79fa7e 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -832,6 +833,23 @@ static void sec_probe_uninit(struct hisi_qm *qm) destroy_workqueue(qm->wq); } +static void sec_iommu_used_check(struct sec_dev *sec) +{ + struct iommu_domain *domain; + struct device *dev = &sec->qm.pdev->dev; + + domain = iommu_get_domain_for_dev(dev); + + /* Check if iommu is used */ + sec->iommu_used = false; + if (domain) { + if (domain->type & __IOMMU_DOMAIN_PAGING) + sec->iommu_used = true; + dev_info(dev, "SMMU Opened, the iommu type = %u\n", + domain->type); + } +} + static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct sec_dev *sec; @@ -845,6 +863,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, sec); sec->ctx_q_num = ctx_q_num; + sec_iommu_used_check(sec); qm = &sec->qm; From 2514f5595db7287b0708a21ee40a3278dade69dc Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Thu, 5 Mar 2020 10:06:24 +0800 Subject: [PATCH 124/162] crypto: hisilicon/sec2 - Update IV and MAC operation We have updated the operation method of IV and MAC address to prepare for pbuf patch and fixed coding style. Signed-off-by: Longfang Liu Signed-off-by: Zaibo Xu Reviewed-by: Jonathan Cameron Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 2 + drivers/crypto/hisilicon/sec2/sec_crypto.c | 75 ++++++++++++---------- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index eab0d22fc57e..e67b416a527d 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -23,6 +23,8 @@ struct sec_cipher_req { dma_addr_t c_in_dma; struct hisi_acc_hw_sgl *c_out; dma_addr_t c_out_dma; + u8 *c_ivin; + dma_addr_t c_ivin_dma; struct skcipher_request *sk_req; u32 c_len; bool encrypt; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index acd15507eb8a..1eeaa746f63c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -46,6 +46,7 @@ #define SEC_CIPHER_AUTH 0xfe #define SEC_AUTH_CIPHER 0x1 #define SEC_MAX_MAC_LEN 64 +#define SEC_MAX_AAD_LEN 65535 #define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH) #define SEC_SQE_LEN_RATE 4 #define SEC_SQE_CFLAG 2 @@ -110,12 +111,12 @@ static void sec_free_req_id(struct sec_req *req) mutex_unlock(&qp_ctx->req_lock); } -static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx) +static int sec_aead_verify(struct sec_req *req) { struct aead_request *aead_req = req->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req); - u8 *mac_out = qp_ctx->res[req->req_id].out_mac; size_t authsize = crypto_aead_authsize(tfm); + u8 *mac_out = req->aead_req.out_mac; u8 *mac = mac_out + SEC_MAX_MAC_LEN; struct scatterlist *sgl = aead_req->src; size_t sz; @@ -163,7 +164,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp) } if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt) - err = sec_aead_verify(req, qp_ctx); + err = sec_aead_verify(req); atomic64_inc(&ctx->sec->debug.dfx.recv_cnt); @@ -259,11 +260,11 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx, if (ctx->alg_type == SEC_AEAD) { ret = sec_alloc_mac_resource(dev, res); if (ret) - goto get_fail; + goto alloc_fail; } return 0; -get_fail: +alloc_fail: sec_free_civ_resource(dev, res); return ret; @@ -590,11 +591,21 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC) GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS) GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC) -static int sec_cipher_map(struct device *dev, struct sec_req *req, +static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src, struct scatterlist *dst) { struct sec_cipher_req *c_req = &req->c_req; + struct sec_aead_req *a_req = &req->aead_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct sec_alg_res *res = &qp_ctx->res[req->req_id]; + struct device *dev = SEC_CTX_DEV(ctx); + + c_req->c_ivin = res->c_ivin; + c_req->c_ivin_dma = res->c_ivin_dma; + if (ctx->alg_type == SEC_AEAD) { + a_req->out_mac = res->out_mac; + a_req->out_mac_dma = res->out_mac_dma; + } c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src, qp_ctx->c_in_pool, @@ -625,29 +636,30 @@ static int sec_cipher_map(struct device *dev, struct sec_req *req, return 0; } -static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req, +static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src, struct scatterlist *dst) { - if (dst != src) - hisi_acc_sg_buf_unmap(dev, src, req->c_in); + struct sec_cipher_req *c_req = &req->c_req; + struct device *dev = SEC_CTX_DEV(ctx); - hisi_acc_sg_buf_unmap(dev, dst, req->c_out); + if (dst != src) + hisi_acc_sg_buf_unmap(dev, src, c_req->c_in); + + hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out); } static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req) { struct skcipher_request *sq = req->c_req.sk_req; - return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst); + return sec_cipher_map(ctx, req, sq->src, sq->dst); } static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req) { - struct device *dev = SEC_CTX_DEV(ctx); - struct sec_cipher_req *c_req = &req->c_req; - struct skcipher_request *sk_req = c_req->sk_req; + struct skcipher_request *sq = req->c_req.sk_req; - sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst); + sec_cipher_unmap(ctx, req, sq->src, sq->dst); } static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx, @@ -758,16 +770,14 @@ static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req) { struct aead_request *aq = req->aead_req.aead_req; - return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst); + return sec_cipher_map(ctx, req, aq->src, aq->dst); } static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req) { - struct device *dev = SEC_CTX_DEV(ctx); - struct sec_cipher_req *cq = &req->c_req; struct aead_request *aq = req->aead_req.aead_req; - sec_cipher_unmap(dev, cq, aq->src, aq->dst); + sec_cipher_unmap(ctx, req, aq->src, aq->dst); } static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req) @@ -800,9 +810,9 @@ static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req) static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req) { struct skcipher_request *sk_req = req->c_req.sk_req; - u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin; + struct sec_cipher_req *c_req = &req->c_req; - memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize); + memcpy(c_req->c_ivin, sk_req->iv, ctx->c_ctx.ivsize); } static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) @@ -817,8 +827,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) memset(sec_sqe, 0, sizeof(struct sec_sqe)); sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma); - sec_sqe->type2.c_ivin_addr = - cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma); + sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma); sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma); sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma); @@ -903,9 +912,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req) { struct aead_request *aead_req = req->aead_req.aead_req; - u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin; + struct sec_cipher_req *c_req = &req->c_req; - memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize); + memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize); } static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir, @@ -938,8 +947,7 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir, sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen); - sec_sqe->type2.mac_addr = - cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma); + sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma); } static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req) @@ -963,6 +971,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) { struct aead_request *a_req = req->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(a_req); + struct sec_aead_req *aead_req = &req->aead_req; struct sec_cipher_req *c_req = &req->c_req; size_t authsize = crypto_aead_authsize(tfm); struct sec_qp_ctx *qp_ctx = req->qp_ctx; @@ -978,7 +987,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) struct scatterlist *sgl = a_req->dst; sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl), - qp_ctx->res[req->req_id].out_mac, + aead_req->out_mac, authsize, a_req->cryptlen + a_req->assoclen); @@ -1030,6 +1039,7 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req) static int sec_process(struct sec_ctx *ctx, struct sec_req *req) { + struct sec_cipher_req *c_req = &req->c_req; int ret; ret = sec_request_init(ctx, req); @@ -1056,12 +1066,10 @@ err_send_req: /* As failing, restore the IV from user */ if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) { if (ctx->alg_type == SEC_SKCIPHER) - memcpy(req->c_req.sk_req->iv, - req->qp_ctx->res[req->req_id].c_ivin, + memcpy(req->c_req.sk_req->iv, c_req->c_ivin, ctx->c_ctx.ivsize); else - memcpy(req->aead_req.aead_req->iv, - req->qp_ctx->res[req->req_id].c_ivin, + memcpy(req->aead_req.aead_req->iv, c_req->c_ivin, ctx->c_ctx.ivsize); } @@ -1320,7 +1328,8 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) struct crypto_aead *tfm = crypto_aead_reqtfm(req); size_t authsize = crypto_aead_authsize(tfm); - if (unlikely(!req->src || !req->dst || !req->cryptlen)) { + if (unlikely(!req->src || !req->dst || !req->cryptlen || + req->assoclen > SEC_MAX_AAD_LEN)) { dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n"); return -EINVAL; } From 74b58db8b7edfadff4a1ed2eb195ab45570a33a9 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Thu, 5 Mar 2020 10:06:25 +0800 Subject: [PATCH 125/162] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver In the scenario of SMMU translation, the SEC performance of short messages (<512Bytes) cannot meet our expectations. To avoid this, we reserve the plat buffer (PBUF) memory for small packets when creating TFM. Signed-off-by: Longfang Liu Signed-off-by: Zaibo Xu Reviewed-by: Jonathan Cameron Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 4 + drivers/crypto/hisilicon/sec2/sec_crypto.c | 173 ++++++++++++++++++++- 2 files changed, 172 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index e67b416a527d..a73d82c9e180 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -11,6 +11,8 @@ /* Algorithm resource per hardware SEC queue */ struct sec_alg_res { + u8 *pbuf; + dma_addr_t pbuf_dma; u8 *c_ivin; dma_addr_t c_ivin_dma; u8 *out_mac; @@ -50,6 +52,7 @@ struct sec_req { /* Status of the SEC request */ bool fake_busy; + bool use_pbuf; }; /** @@ -130,6 +133,7 @@ struct sec_ctx { atomic_t dec_qcyclic; enum sec_alg_type alg_type; + bool pbuf_supported; struct sec_cipher_ctx c_ctx; struct sec_auth_ctx a_ctx; }; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 1eeaa746f63c..b2557431feb4 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -48,6 +48,19 @@ #define SEC_MAX_MAC_LEN 64 #define SEC_MAX_AAD_LEN 65535 #define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH) + +#define SEC_PBUF_SZ 512 +#define SEC_PBUF_IV_OFFSET SEC_PBUF_SZ +#define SEC_PBUF_MAC_OFFSET (SEC_PBUF_SZ + SEC_IV_SIZE) +#define SEC_PBUF_PKG (SEC_PBUF_SZ + SEC_IV_SIZE + \ + SEC_MAX_MAC_LEN * 2) +#define SEC_PBUF_NUM (PAGE_SIZE / SEC_PBUF_PKG) +#define SEC_PBUF_PAGE_NUM (QM_Q_DEPTH / SEC_PBUF_NUM) +#define SEC_PBUF_LEFT_SZ (SEC_PBUF_PKG * (QM_Q_DEPTH - \ + SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM)) +#define SEC_TOTAL_PBUF_SZ (PAGE_SIZE * SEC_PBUF_PAGE_NUM + \ + SEC_PBUF_LEFT_SZ) + #define SEC_SQE_LEN_RATE 4 #define SEC_SQE_CFLAG 2 #define SEC_SQE_AEAD_FLAG 3 @@ -246,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res) res->out_mac, res->out_mac_dma); } +static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res) +{ + if (res->pbuf) + dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ, + res->pbuf, res->pbuf_dma); +} + +/* + * To improve performance, pbuffer is used for + * small packets (< 512Bytes) as IOMMU translation using. + */ +static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res) +{ + int pbuf_page_offset; + int i, j, k; + + res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ, + &res->pbuf_dma, GFP_KERNEL); + if (!res->pbuf) + return -ENOMEM; + + /* + * SEC_PBUF_PKG contains data pbuf, iv and + * out_mac : + * Every PAGE contains six SEC_PBUF_PKG + * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG + * So we need SEC_PBUF_PAGE_NUM numbers of PAGE + * for the SEC_TOTAL_PBUF_SZ + */ + for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) { + pbuf_page_offset = PAGE_SIZE * i; + for (j = 0; j < SEC_PBUF_NUM; j++) { + k = i * SEC_PBUF_NUM + j; + if (k == QM_Q_DEPTH) + break; + res[k].pbuf = res->pbuf + + j * SEC_PBUF_PKG + pbuf_page_offset; + res[k].pbuf_dma = res->pbuf_dma + + j * SEC_PBUF_PKG + pbuf_page_offset; + } + } + return 0; +} + static int sec_alg_resource_alloc(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) { @@ -262,6 +319,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx, if (ret) goto alloc_fail; } + if (ctx->pbuf_supported) { + ret = sec_alloc_pbuf_resource(dev, res); + if (ret) { + dev_err(dev, "fail to alloc pbuf dma resource!\n"); + goto alloc_fail; + } + } return 0; alloc_fail: @@ -277,6 +341,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx, sec_free_civ_resource(dev, qp_ctx->res); + if (ctx->pbuf_supported) + sec_free_pbuf_resource(dev, qp_ctx->res); if (ctx->alg_type == SEC_AEAD) sec_free_mac_resource(dev, qp_ctx->res); } @@ -369,6 +435,8 @@ static int sec_ctx_base_init(struct sec_ctx *ctx) ctx->sec = sec; ctx->hlf_q_num = sec->ctx_q_num >> 1; + ctx->pbuf_supported = ctx->sec->iommu_used; + /* Half of queue depth is taken as fake requests limit in the queue. */ ctx->fake_req_limit = QM_Q_DEPTH >> 1; ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx), @@ -591,6 +659,66 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC) GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS) GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC) +static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, + struct scatterlist *src) +{ + struct aead_request *aead_req = req->aead_req.aead_req; + struct sec_cipher_req *c_req = &req->c_req; + struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct device *dev = SEC_CTX_DEV(ctx); + int copy_size, pbuf_length; + int req_id = req->req_id; + + if (ctx->alg_type == SEC_AEAD) + copy_size = aead_req->cryptlen + aead_req->assoclen; + else + copy_size = c_req->c_len; + + pbuf_length = sg_copy_to_buffer(src, sg_nents(src), + qp_ctx->res[req_id].pbuf, + copy_size); + + if (unlikely(pbuf_length != copy_size)) { + dev_err(dev, "copy src data to pbuf error!\n"); + return -EINVAL; + } + + c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma; + + if (!c_req->c_in_dma) { + dev_err(dev, "fail to set pbuffer address!\n"); + return -ENOMEM; + } + + c_req->c_out_dma = c_req->c_in_dma; + + return 0; +} + +static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req, + struct scatterlist *dst) +{ + struct aead_request *aead_req = req->aead_req.aead_req; + struct sec_cipher_req *c_req = &req->c_req; + struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct device *dev = SEC_CTX_DEV(ctx); + int copy_size, pbuf_length; + int req_id = req->req_id; + + if (ctx->alg_type == SEC_AEAD) + copy_size = c_req->c_len + aead_req->assoclen; + else + copy_size = c_req->c_len; + + pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst), + qp_ctx->res[req_id].pbuf, + copy_size); + + if (unlikely(pbuf_length != copy_size)) + dev_err(dev, "copy pbuf data to dst error!\n"); + +} + static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src, struct scatterlist *dst) { @@ -599,7 +727,20 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, struct sec_qp_ctx *qp_ctx = req->qp_ctx; struct sec_alg_res *res = &qp_ctx->res[req->req_id]; struct device *dev = SEC_CTX_DEV(ctx); + int ret; + if (req->use_pbuf) { + ret = sec_cipher_pbuf_map(ctx, req, src); + c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET; + c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET; + if (ctx->alg_type == SEC_AEAD) { + a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET; + a_req->out_mac_dma = res->pbuf_dma + + SEC_PBUF_MAC_OFFSET; + } + + return ret; + } c_req->c_ivin = res->c_ivin; c_req->c_ivin_dma = res->c_ivin_dma; if (ctx->alg_type == SEC_AEAD) { @@ -642,10 +783,14 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req, struct sec_cipher_req *c_req = &req->c_req; struct device *dev = SEC_CTX_DEV(ctx); - if (dst != src) - hisi_acc_sg_buf_unmap(dev, src, c_req->c_in); + if (req->use_pbuf) { + sec_cipher_pbuf_unmap(ctx, req, dst); + } else { + if (dst != src) + hisi_acc_sg_buf_unmap(dev, src, c_req->c_in); - hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out); + hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out); + } } static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req) @@ -844,7 +989,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET; sec_sqe->type_cipher_auth = bd_type | cipher; - sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET; + if (req->use_pbuf) + sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET; + else + sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET; scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET; if (c_req->c_in_dma != c_req->c_out_dma) de = 0x1 << SEC_DE_OFFSET; @@ -852,7 +1000,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) sec_sqe->sds_sa_type = (de | scene | sa_type); /* Just set DST address type */ - da_type = SEC_SGL << SEC_DST_SGL_OFFSET; + if (req->use_pbuf) + da_type = SEC_PBUF << SEC_DST_SGL_OFFSET; + else + da_type = SEC_SGL << SEC_DST_SGL_OFFSET; sec_sqe->sdm_addr_type |= da_type; sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len); @@ -1215,6 +1366,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq) return -EINVAL; } sreq->c_req.c_len = sk_req->cryptlen; + + if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ) + sreq->use_pbuf = true; + else + sreq->use_pbuf = false; + if (c_alg == SEC_CALG_3DES) { if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) { dev_err(dev, "skcipher 3des input length error!\n"); @@ -1334,6 +1491,12 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq) return -EINVAL; } + if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <= + SEC_PBUF_SZ) + sreq->use_pbuf = true; + else + sreq->use_pbuf = false; + /* Support AES only */ if (unlikely(c_alg != SEC_CALG_AES)) { dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n"); From ace74b627c88ea96d4fe5811a5128475aa6a49a6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Mar 2020 22:21:48 -0800 Subject: [PATCH 126/162] hwrng: ks-sa - move TI Keystone driver into the config menu structure Move the TI Keystone hardware random number generator into the same menu as all of the other hardware random number generators. This makes the driver config be listed in the correct place in the kconfig tools. Fixes: eb428ee0e3ca ("hwrng: ks-sa - add hw_random driver") Signed-off-by: Randy Dunlap Cc: Vitaly Andrianov Cc: Tero Kristo Cc: Murali Karicheri Cc: Herbert Xu Cc: Matt Mackall Cc: linux-crypto@vger.kernel.org Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index ddbfa190caf4..9bc46da8d77a 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -467,6 +467,13 @@ config HW_RANDOM_NPCM If unsure, say Y. +config HW_RANDOM_KEYSTONE + depends on ARCH_KEYSTONE || COMPILE_TEST + default HW_RANDOM + tristate "TI Keystone NETCP SA Hardware random number generator" + help + This option enables Keystone's hardware random generator. + endif # HW_RANDOM config UML_RANDOM @@ -483,10 +490,3 @@ config UML_RANDOM (check your distro, or download from http://sourceforge.net/projects/gkernel/). rngd periodically reads /dev/hwrng and injects the entropy into /dev/random. - -config HW_RANDOM_KEYSTONE - depends on ARCH_KEYSTONE || COMPILE_TEST - default HW_RANDOM - tristate "TI Keystone NETCP SA Hardware random number generator" - help - This option enables Keystone's hardware random generator. From 47a1f8e8b3637ff5f7806587883d7d94068d9ee8 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 5 Mar 2020 21:58:20 +0100 Subject: [PATCH 127/162] hwrng: imx-rngc - fix an error path Make sure that the rngc interrupt is masked if the rngc self test fails. Self test failure means that probe fails as well. Interrupts should be masked in this case, regardless of the error. Cc: stable@vger.kernel.org Fixes: 1d5449445bd0 ("hwrng: mx-rngc - add a driver for Freescale RNGC") Reviewed-by: PrasannaKumar Muralidharan Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu --- drivers/char/hw_random/imx-rngc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 30cf00f8e9a0..0576801944fd 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -105,8 +105,10 @@ static int imx_rngc_self_test(struct imx_rngc *rngc) return -ETIMEDOUT; } - if (rngc->err_reg != 0) + if (rngc->err_reg != 0) { + imx_rngc_irq_mask_clear(rngc); return -EIO; + } return 0; } From 3acd9ea9331cd56f9769d16211c865d4638fd12d Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 5 Mar 2020 21:58:21 +0100 Subject: [PATCH 128/162] hwrng: imx-rngc - use automatic seeding The rngc requires a new seed for its prng after generating 2^20 160-bit words of random data. At the moment, we seed the prng only once during initalisation. Set the rngc to auto seed mode so that it kicks off the internal reseeding operation when a new seed is required. Keep the manual calculation of the initial seed when the device is probed and switch to automatic seeding afterwards. Reviewed-by: PrasannaKumar Muralidharan Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu --- drivers/char/hw_random/imx-rngc.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 0576801944fd..903894518c8d 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -31,6 +31,7 @@ #define RNGC_CTRL_MASK_ERROR 0x00000040 #define RNGC_CTRL_MASK_DONE 0x00000020 +#define RNGC_CTRL_AUTO_SEED 0x00000010 #define RNGC_STATUS_ERROR 0x00010000 #define RNGC_STATUS_FIFO_LEVEL_MASK 0x00000f00 @@ -167,7 +168,7 @@ static irqreturn_t imx_rngc_irq(int irq, void *priv) static int imx_rngc_init(struct hwrng *rng) { struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng); - u32 cmd; + u32 cmd, ctrl; int ret; /* clear error */ @@ -192,7 +193,18 @@ static int imx_rngc_init(struct hwrng *rng) } while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR); - return rngc->err_reg ? -EIO : 0; + if (rngc->err_reg) + return -EIO; + + /* + * enable automatic seeding, the rngc creates a new seed automatically + * after serving 2^20 random 160-bit words + */ + ctrl = readl(rngc->base + RNGC_CONTROL); + ctrl |= RNGC_CTRL_AUTO_SEED; + writel(ctrl, rngc->base + RNGC_CONTROL); + + return 0; } static int imx_rngc_probe(struct platform_device *pdev) From f7d79450d192d194a88c37b55c048d3840a5d74b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 5 Mar 2020 21:58:22 +0100 Subject: [PATCH 129/162] hwrng: imx-rngc - (trivial) simplify error prints Remove the device name, it is added by the dev_...() routines. Drop the error code as well. It will be shown by the driver core when the probe operation failed. Reviewed-by: PrasannaKumar Muralidharan Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu --- drivers/char/hw_random/imx-rngc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 903894518c8d..92e93abcc9cc 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -258,14 +258,14 @@ static int imx_rngc_probe(struct platform_device *pdev) if (self_test) { ret = imx_rngc_self_test(rngc); if (ret) { - dev_err(rngc->dev, "FSL RNGC self test failed.\n"); + dev_err(rngc->dev, "self test failed\n"); goto err; } } ret = hwrng_register(&rngc->rng); if (ret) { - dev_err(&pdev->dev, "FSL RNGC registering failed (%d)\n", ret); + dev_err(&pdev->dev, "hwrng registration failed\n"); goto err; } From b9957fc944586da3d4a175c811afec563636c713 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 5 Mar 2020 21:58:23 +0100 Subject: [PATCH 130/162] hwrng: imx-rngc - check the rng type Read the rng type and hardware revision during probe. Fail the probe operation if the type is not one of rngc or rngb. (There's also an rnga type, which needs a different driver.) Display the type and revision in a debug print if probe was successful. Reviewed-by: PrasannaKumar Muralidharan Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu --- drivers/char/hw_random/imx-rngc.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 92e93abcc9cc..50a8923d829a 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -18,12 +18,22 @@ #include #include +#define RNGC_VER_ID 0x0000 #define RNGC_COMMAND 0x0004 #define RNGC_CONTROL 0x0008 #define RNGC_STATUS 0x000C #define RNGC_ERROR 0x0010 #define RNGC_FIFO 0x0014 +/* the fields in the ver id register */ +#define RNGC_TYPE_SHIFT 28 +#define RNGC_VER_MAJ_SHIFT 8 + +/* the rng_type field */ +#define RNGC_TYPE_RNGB 0x1 +#define RNGC_TYPE_RNGC 0x2 + + #define RNGC_CMD_CLR_ERR 0x00000020 #define RNGC_CMD_CLR_INT 0x00000010 #define RNGC_CMD_SEED 0x00000002 @@ -212,6 +222,8 @@ static int imx_rngc_probe(struct platform_device *pdev) struct imx_rngc *rngc; int ret; int irq; + u32 ver_id; + u8 rng_type; rngc = devm_kzalloc(&pdev->dev, sizeof(*rngc), GFP_KERNEL); if (!rngc) @@ -237,6 +249,17 @@ static int imx_rngc_probe(struct platform_device *pdev) if (ret) return ret; + ver_id = readl(rngc->base + RNGC_VER_ID); + rng_type = ver_id >> RNGC_TYPE_SHIFT; + /* + * This driver supports only RNGC and RNGB. (There's a different + * driver for RNGA.) + */ + if (rng_type != RNGC_TYPE_RNGC && rng_type != RNGC_TYPE_RNGB) { + ret = -ENODEV; + goto err; + } + ret = devm_request_irq(&pdev->dev, irq, imx_rngc_irq, 0, pdev->name, (void *)rngc); if (ret) { @@ -269,7 +292,10 @@ static int imx_rngc_probe(struct platform_device *pdev) goto err; } - dev_info(&pdev->dev, "Freescale RNGC registered.\n"); + dev_info(&pdev->dev, + "Freescale RNG%c registered (HW revision %d.%02d)\n", + rng_type == RNGC_TYPE_RNGB ? 'B' : 'C', + (ver_id >> RNGC_VER_MAJ_SHIFT) & 0xff, ver_id & 0xff); return 0; err: From f086fd1e43443dd3c3fd26d3d1a59e52fc4bf29b Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 5 Mar 2020 21:58:24 +0100 Subject: [PATCH 131/162] hwrng: imx-rngc - simplify interrupt mask/unmask Use a simpler approach for masking / unmasking the rngc interrupt: The interrupt is unmasked while self-test is running and when the rngc driver is used by the hwrng core. Mask the interrupt again when self test is finished, regardless of self test success or failure. Unmask the interrupt in the init function. Add a cleanup function where the rngc interrupt is masked again. Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu --- drivers/char/hw_random/imx-rngc.c | 43 ++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 50a8923d829a..9c47e431ce90 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -111,17 +111,11 @@ static int imx_rngc_self_test(struct imx_rngc *rngc) writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND); ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT); - if (!ret) { - imx_rngc_irq_mask_clear(rngc); + imx_rngc_irq_mask_clear(rngc); + if (!ret) return -ETIMEDOUT; - } - if (rngc->err_reg != 0) { - imx_rngc_irq_mask_clear(rngc); - return -EIO; - } - - return 0; + return rngc->err_reg ? -EIO : 0; } static int imx_rngc_read(struct hwrng *rng, void *data, size_t max, bool wait) @@ -185,10 +179,10 @@ static int imx_rngc_init(struct hwrng *rng) cmd = readl(rngc->base + RNGC_COMMAND); writel(cmd | RNGC_CMD_CLR_ERR, rngc->base + RNGC_COMMAND); + imx_rngc_irq_unmask(rngc); + /* create seed, repeat while there is some statistical error */ do { - imx_rngc_irq_unmask(rngc); - /* seed creation */ cmd = readl(rngc->base + RNGC_COMMAND); writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND); @@ -197,14 +191,16 @@ static int imx_rngc_init(struct hwrng *rng) RNGC_TIMEOUT); if (!ret) { - imx_rngc_irq_mask_clear(rngc); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto err; } } while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR); - if (rngc->err_reg) - return -EIO; + if (rngc->err_reg) { + ret = -EIO; + goto err; + } /* * enable automatic seeding, the rngc creates a new seed automatically @@ -214,7 +210,23 @@ static int imx_rngc_init(struct hwrng *rng) ctrl |= RNGC_CTRL_AUTO_SEED; writel(ctrl, rngc->base + RNGC_CONTROL); + /* + * if initialisation was successful, we keep the interrupt + * unmasked until imx_rngc_cleanup is called + * we mask the interrupt ourselves if we return an error + */ return 0; + +err: + imx_rngc_irq_mask_clear(rngc); + return ret; +} + +static void imx_rngc_cleanup(struct hwrng *rng) +{ + struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng); + + imx_rngc_irq_mask_clear(rngc); } static int imx_rngc_probe(struct platform_device *pdev) @@ -272,6 +284,7 @@ static int imx_rngc_probe(struct platform_device *pdev) rngc->rng.name = pdev->name; rngc->rng.init = imx_rngc_init; rngc->rng.read = imx_rngc_read; + rngc->rng.cleanup = imx_rngc_cleanup; rngc->dev = &pdev->dev; platform_set_drvdata(pdev, rngc); From b61028135843d5418f0469d11e2769ca77159463 Mon Sep 17 00:00:00 2001 From: Connor Kuehl Date: Fri, 6 Mar 2020 09:20:10 -0800 Subject: [PATCH 132/162] crypto: ccp - use file mode for sev ioctl permissions Instead of using CAP_SYS_ADMIN which is restricted to the root user, check the file mode for write permissions before executing commands that can affect the platform. This allows for more fine-grained access control to the SEV ioctl interface. This would allow a SEV-only user or group the ability to administer the platform without requiring them to be root or granting them overly powerful permissions. For example: chown root:root /dev/sev chmod 600 /dev/sev setfacl -m g:sev:r /dev/sev setfacl -m g:sev-admin:rw /dev/sev In this instance, members of the "sev-admin" group have the ability to perform all ioctl calls (including the ones that modify platform state). Members of the "sev" group only have access to the ioctls that do not modify the platform state. This also makes opening "/dev/sev" more consistent with how file descriptors are usually handled. By only checking for CAP_SYS_ADMIN, the file descriptor could be opened read-only but could still execute ioctls that modify the platform state. This patch enforces that the file descriptor is opened with write privileges if it is going to be used to modify the platform state. This flexibility is completely opt-in, and if it is not desirable by the administrator then they do not need to give anyone else access to /dev/sev. Signed-off-by: Connor Kuehl Reviewed-by: Brijesh Singh Reviewed-by: Bandan Das Acked-by: David Rientjes Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index aa591dae067c..896f190b9a50 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -283,11 +283,11 @@ static int sev_get_platform_state(int *state, int *error) return rc; } -static int sev_ioctl_do_reset(struct sev_issue_cmd *argp) +static int sev_ioctl_do_reset(struct sev_issue_cmd *argp, bool writable) { int state, rc; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; /* @@ -331,12 +331,12 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) return ret; } -static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; int rc; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (sev->state == SEV_STATE_UNINIT) { @@ -348,7 +348,7 @@ static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp) return __sev_do_cmd_locked(cmd, NULL, &argp->error); } -static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pek_csr input; @@ -356,7 +356,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp) void *blob = NULL; int ret; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) @@ -539,7 +539,7 @@ fw_err: return ret; } -static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pek_cert_import input; @@ -547,7 +547,7 @@ static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp) void *pek_blob, *oca_blob; int ret; - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) @@ -698,7 +698,7 @@ static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp) return ret; } -static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp) +static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; struct sev_user_data_pdh_cert_export input; @@ -708,7 +708,7 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp) /* If platform is not in INIT state then transition it to INIT. */ if (sev->state != SEV_STATE_INIT) { - if (!capable(CAP_SYS_ADMIN)) + if (!writable) return -EPERM; ret = __sev_platform_init_locked(&argp->error); @@ -801,6 +801,7 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) void __user *argp = (void __user *)arg; struct sev_issue_cmd input; int ret = -EFAULT; + bool writable = file->f_mode & FMODE_WRITE; if (!psp_master || !psp_master->sev_data) return -ENODEV; @@ -819,25 +820,25 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) switch (input.cmd) { case SEV_FACTORY_RESET: - ret = sev_ioctl_do_reset(&input); + ret = sev_ioctl_do_reset(&input, writable); break; case SEV_PLATFORM_STATUS: ret = sev_ioctl_do_platform_status(&input); break; case SEV_PEK_GEN: - ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input); + ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input, writable); break; case SEV_PDH_GEN: - ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input); + ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input, writable); break; case SEV_PEK_CSR: - ret = sev_ioctl_do_pek_csr(&input); + ret = sev_ioctl_do_pek_csr(&input, writable); break; case SEV_PEK_CERT_IMPORT: - ret = sev_ioctl_do_pek_import(&input); + ret = sev_ioctl_do_pek_import(&input, writable); break; case SEV_PDH_CERT_EXPORT: - ret = sev_ioctl_do_pdh_export(&input); + ret = sev_ioctl_do_pdh_export(&input, writable); break; case SEV_GET_ID: pr_warn_once("SEV_GET_ID command is deprecated, use SEV_GET_ID2\n"); From d8215ff1265fb711227f389fdf6c5b65c7120873 Mon Sep 17 00:00:00 2001 From: Hadar Gat Date: Sun, 8 Mar 2020 17:57:04 +0200 Subject: [PATCH 133/162] crypto: ccree - update register handling macros Changed CC_GENMASK macro so it can be used for all HW registers. Signed-off-by: Hadar Gat Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.h | 4 +- drivers/crypto/ccree/cc_hw_queue_defs.h | 73 ++++++++++++------------- 2 files changed, 36 insertions(+), 41 deletions(-) diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index 4790eb5cb8bd..a0fbf254f54b 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -71,9 +71,7 @@ enum cc_std_body { #define CC_NVM_IS_IDLE_MASK BIT(CC_NVM_IS_IDLE_VALUE_BIT_SHIFT) -#define AXIM_MON_COMP_VALUE GENMASK(CC_AXIM_MON_COMP_VALUE_BIT_SIZE + \ - CC_AXIM_MON_COMP_VALUE_BIT_SHIFT, \ - CC_AXIM_MON_COMP_VALUE_BIT_SHIFT) +#define AXIM_MON_COMP_VALUE CC_GENMASK(CC_AXIM_MON_COMP_VALUE) #define CC_CPP_AES_ABORT_MASK ( \ BIT(CC_HOST_IMR_REE_OP_ABORTED_AES_0_MASK_BIT_SHIFT) | \ diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h index 25ef28669989..15df58c66911 100644 --- a/drivers/crypto/ccree/cc_hw_queue_defs.h +++ b/drivers/crypto/ccree/cc_hw_queue_defs.h @@ -17,46 +17,43 @@ /* Define max. available slots in HW queue */ #define HW_QUEUE_SLOTS_MAX 15 -#define CC_REG_LOW(word, name) \ - (CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SHIFT) +#define CC_REG_LOW(name) (name ## _BIT_SHIFT) +#define CC_REG_HIGH(name) (CC_REG_LOW(name) + name ## _BIT_SIZE - 1) +#define CC_GENMASK(name) GENMASK(CC_REG_HIGH(name), CC_REG_LOW(name)) -#define CC_REG_HIGH(word, name) \ - (CC_REG_LOW(word, name) + \ - CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SIZE - 1) +#define CC_HWQ_GENMASK(word, field) \ + CC_GENMASK(CC_DSCRPTR_QUEUE_WORD ## word ## _ ## field) -#define CC_GENMASK(word, name) \ - GENMASK(CC_REG_HIGH(word, name), CC_REG_LOW(word, name)) - -#define WORD0_VALUE CC_GENMASK(0, VALUE) -#define WORD0_CPP_CIPHER_MODE CC_GENMASK(0, CPP_CIPHER_MODE) -#define WORD1_DIN_CONST_VALUE CC_GENMASK(1, DIN_CONST_VALUE) -#define WORD1_DIN_DMA_MODE CC_GENMASK(1, DIN_DMA_MODE) -#define WORD1_DIN_SIZE CC_GENMASK(1, DIN_SIZE) -#define WORD1_NOT_LAST CC_GENMASK(1, NOT_LAST) -#define WORD1_NS_BIT CC_GENMASK(1, NS_BIT) -#define WORD1_LOCK_QUEUE CC_GENMASK(1, LOCK_QUEUE) -#define WORD2_VALUE CC_GENMASK(2, VALUE) -#define WORD3_DOUT_DMA_MODE CC_GENMASK(3, DOUT_DMA_MODE) -#define WORD3_DOUT_LAST_IND CC_GENMASK(3, DOUT_LAST_IND) -#define WORD3_DOUT_SIZE CC_GENMASK(3, DOUT_SIZE) -#define WORD3_HASH_XOR_BIT CC_GENMASK(3, HASH_XOR_BIT) -#define WORD3_NS_BIT CC_GENMASK(3, NS_BIT) -#define WORD3_QUEUE_LAST_IND CC_GENMASK(3, QUEUE_LAST_IND) -#define WORD4_ACK_NEEDED CC_GENMASK(4, ACK_NEEDED) -#define WORD4_AES_SEL_N_HASH CC_GENMASK(4, AES_SEL_N_HASH) -#define WORD4_AES_XOR_CRYPTO_KEY CC_GENMASK(4, AES_XOR_CRYPTO_KEY) -#define WORD4_BYTES_SWAP CC_GENMASK(4, BYTES_SWAP) -#define WORD4_CIPHER_CONF0 CC_GENMASK(4, CIPHER_CONF0) -#define WORD4_CIPHER_CONF1 CC_GENMASK(4, CIPHER_CONF1) -#define WORD4_CIPHER_CONF2 CC_GENMASK(4, CIPHER_CONF2) -#define WORD4_CIPHER_DO CC_GENMASK(4, CIPHER_DO) -#define WORD4_CIPHER_MODE CC_GENMASK(4, CIPHER_MODE) -#define WORD4_CMAC_SIZE0 CC_GENMASK(4, CMAC_SIZE0) -#define WORD4_DATA_FLOW_MODE CC_GENMASK(4, DATA_FLOW_MODE) -#define WORD4_KEY_SIZE CC_GENMASK(4, KEY_SIZE) -#define WORD4_SETUP_OPERATION CC_GENMASK(4, SETUP_OPERATION) -#define WORD5_DIN_ADDR_HIGH CC_GENMASK(5, DIN_ADDR_HIGH) -#define WORD5_DOUT_ADDR_HIGH CC_GENMASK(5, DOUT_ADDR_HIGH) +#define WORD0_VALUE CC_HWQ_GENMASK(0, VALUE) +#define WORD0_CPP_CIPHER_MODE CC_HWQ_GENMASK(0, CPP_CIPHER_MODE) +#define WORD1_DIN_CONST_VALUE CC_HWQ_GENMASK(1, DIN_CONST_VALUE) +#define WORD1_DIN_DMA_MODE CC_HWQ_GENMASK(1, DIN_DMA_MODE) +#define WORD1_DIN_SIZE CC_HWQ_GENMASK(1, DIN_SIZE) +#define WORD1_NOT_LAST CC_HWQ_GENMASK(1, NOT_LAST) +#define WORD1_NS_BIT CC_HWQ_GENMASK(1, NS_BIT) +#define WORD1_LOCK_QUEUE CC_HWQ_GENMASK(1, LOCK_QUEUE) +#define WORD2_VALUE CC_HWQ_GENMASK(2, VALUE) +#define WORD3_DOUT_DMA_MODE CC_HWQ_GENMASK(3, DOUT_DMA_MODE) +#define WORD3_DOUT_LAST_IND CC_HWQ_GENMASK(3, DOUT_LAST_IND) +#define WORD3_DOUT_SIZE CC_HWQ_GENMASK(3, DOUT_SIZE) +#define WORD3_HASH_XOR_BIT CC_HWQ_GENMASK(3, HASH_XOR_BIT) +#define WORD3_NS_BIT CC_HWQ_GENMASK(3, NS_BIT) +#define WORD3_QUEUE_LAST_IND CC_HWQ_GENMASK(3, QUEUE_LAST_IND) +#define WORD4_ACK_NEEDED CC_HWQ_GENMASK(4, ACK_NEEDED) +#define WORD4_AES_SEL_N_HASH CC_HWQ_GENMASK(4, AES_SEL_N_HASH) +#define WORD4_AES_XOR_CRYPTO_KEY CC_HWQ_GENMASK(4, AES_XOR_CRYPTO_KEY) +#define WORD4_BYTES_SWAP CC_HWQ_GENMASK(4, BYTES_SWAP) +#define WORD4_CIPHER_CONF0 CC_HWQ_GENMASK(4, CIPHER_CONF0) +#define WORD4_CIPHER_CONF1 CC_HWQ_GENMASK(4, CIPHER_CONF1) +#define WORD4_CIPHER_CONF2 CC_HWQ_GENMASK(4, CIPHER_CONF2) +#define WORD4_CIPHER_DO CC_HWQ_GENMASK(4, CIPHER_DO) +#define WORD4_CIPHER_MODE CC_HWQ_GENMASK(4, CIPHER_MODE) +#define WORD4_CMAC_SIZE0 CC_HWQ_GENMASK(4, CMAC_SIZE0) +#define WORD4_DATA_FLOW_MODE CC_HWQ_GENMASK(4, DATA_FLOW_MODE) +#define WORD4_KEY_SIZE CC_HWQ_GENMASK(4, KEY_SIZE) +#define WORD4_SETUP_OPERATION CC_HWQ_GENMASK(4, SETUP_OPERATION) +#define WORD5_DIN_ADDR_HIGH CC_HWQ_GENMASK(5, DIN_ADDR_HIGH) +#define WORD5_DOUT_ADDR_HIGH CC_HWQ_GENMASK(5, DOUT_ADDR_HIGH) /****************************************************************************** * TYPE DEFINITIONS From 8b9d6e982db7a725b7538057aff139716fe23896 Mon Sep 17 00:00:00 2001 From: Hadar Gat Date: Sun, 8 Mar 2020 17:57:05 +0200 Subject: [PATCH 134/162] crypto: ccree - remove pointless comment removed pointless comment Signed-off-by: Hadar Gat Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_driver.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index a0fbf254f54b..d938886390d2 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -26,7 +26,6 @@ #include #include -/* Registers definitions from shared/hw/ree_include */ #include "cc_host_regs.h" #include "cc_crypto_ctx.h" #include "cc_hw_queue_defs.h" From 6429ccdd4f2d873db73ca60342669e160b0a7226 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Sun, 8 Mar 2020 17:57:06 +0200 Subject: [PATCH 135/162] crypto: ccree - remove ancient TODO remarks Remove left over ancient and now misleading TODO remarks. Signed-off-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 1 - drivers/crypto/ccree/cc_buffer_mgr.c | 2 -- drivers/crypto/ccree/cc_cipher.c | 1 - drivers/crypto/ccree/cc_hash.c | 3 --- drivers/crypto/ccree/cc_request_mgr.c | 1 - 5 files changed, 8 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 904aa66752b3..1c5d927d632c 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -1990,7 +1990,6 @@ static int cc_proc_aead(struct aead_request *req, /* Load MLLI tables to SRAM if necessary */ cc_mlli_to_sram(req, desc, &seq_len); - /*TODO: move seq len by reference */ switch (ctx->auth_mode) { case DRV_HASH_SHA1: case DRV_HASH_SHA256: diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 1ea4812e9354..526da532a0a3 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -606,7 +606,6 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n", hw_iv_size, req->iv, &areq_ctx->gen_ctx.iv_dma_addr); - // TODO: what about CTR?? ask Ron if (do_chain && areq_ctx->plaintext_authenticate_only) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned int iv_size_to_authenc = crypto_aead_ivsize(tfm); @@ -1225,7 +1224,6 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, return 0; } - /*TODO: copy data in case that buffer is enough for operation */ /* map the previous buffer */ if (*curr_buff_cnt) { rc = cc_set_hash_buf(dev, areq_ctx, curr_buff, *curr_buff_cnt, diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index d8e6a103a637..a84335328f37 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -859,7 +859,6 @@ static int cc_cipher_process(struct skcipher_request *req, /* STAT_PHASE_0: Init and sanity checks */ - /* TODO: check data length according to mode */ if (validate_data_size(ctx_p, nbytes)) { dev_dbg(dev, "Unsupported data size %d.\n", nbytes); rc = -EINVAL; diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 0c32aa2e6801..d5310783af15 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -349,7 +349,6 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req, /* Get final MAC result */ hw_desc_init(&desc[idx]); set_hash_cipher_mode(&desc[idx], ctx->hw_mode, ctx->hash_mode); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); @@ -1319,7 +1318,6 @@ static int cc_mac_final(struct ahash_request *req) /* Get final MAC result */ hw_desc_init(&desc[idx]); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); @@ -1401,7 +1399,6 @@ static int cc_mac_finup(struct ahash_request *req) /* Get final MAC result */ hw_desc_init(&desc[idx]); - /* TODO */ set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize, NS_BIT, 1); set_queue_last_ind(ctx->drvdata, &desc[idx]); diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c index 2671cffb3b58..1d7649ecf44e 100644 --- a/drivers/crypto/ccree/cc_request_mgr.c +++ b/drivers/crypto/ccree/cc_request_mgr.c @@ -296,7 +296,6 @@ static void cc_do_send_request(struct cc_drvdata *drvdata, req_mgr_h->req_queue[req_mgr_h->req_queue_head] = *cc_req; req_mgr_h->req_queue_head = (req_mgr_h->req_queue_head + 1) & (MAX_REQUEST_QUEUE_SIZE - 1); - /* TODO: Use circ_buf.h ? */ dev_dbg(dev, "Enqueue request head=%u\n", req_mgr_h->req_queue_head); From 69cd3e16c0d6f7dfe92c969a43221a802ee549ae Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Sun, 8 Mar 2020 17:57:07 +0200 Subject: [PATCH 136/162] crypto: ccree - only check condition if needed Move testing of condition to after the point we decide if we need it or not. Signed-off-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 1c5d927d632c..cce103e3b822 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -1800,12 +1800,6 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[], struct aead_req_ctx *req_ctx = aead_request_ctx(req); unsigned int cipher_flow_mode; - if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) { - cipher_flow_mode = AES_and_HASH; - } else { /* Encrypt */ - cipher_flow_mode = AES_to_HASH_and_DOUT; - } - //in RFC4543 no data to encrypt. just copy data from src to dest. if (req_ctx->plaintext_authenticate_only) { cc_proc_cipher_desc(req, BYPASS, desc, seq_size); @@ -1817,6 +1811,12 @@ static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[], return 0; } + if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) { + cipher_flow_mode = AES_and_HASH; + } else { /* Encrypt */ + cipher_flow_mode = AES_to_HASH_and_DOUT; + } + // for gcm and rfc4106. cc_set_ghash_desc(req, desc, seq_size); /* process(ghash) assoc data */ From b66c1876a756d074965d87c026ca19ff2277280f Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Sun, 8 Mar 2020 17:57:08 +0200 Subject: [PATCH 137/162] crypto: ccree - use crypto_ipsec_check_assoclen() Use crypto_ipsec_check_assoclen() instead of home grown functions. Clean up some unneeded code as a result. Delete stale comments while we're at it. Signed-off-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 73 ++++++++++------------------------ 1 file changed, 20 insertions(+), 53 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index cce103e3b822..ede16e37d453 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include "cc_driver.h" @@ -60,11 +62,6 @@ struct cc_aead_ctx { enum drv_hash_mode auth_mode; }; -static inline bool valid_assoclen(struct aead_request *req) -{ - return ((req->assoclen == 16) || (req->assoclen == 20)); -} - static void cc_aead_exit(struct crypto_aead *tfm) { struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); @@ -2050,15 +2047,11 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req) /* Very similar to cc_aead_encrypt() above. */ struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2099,16 +2092,12 @@ static int cc_aead_decrypt(struct aead_request *req) static int cc_rfc4309_ccm_decrypt(struct aead_request *req) { - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2216,18 +2205,12 @@ static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc, static int cc_rfc4106_gcm_encrypt(struct aead_request *req) { - /* Very similar to cc_aead_encrypt() above. */ - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2248,17 +2231,12 @@ out: static int cc_rfc4543_gcm_encrypt(struct aead_request *req) { - /* Very similar to cc_aead_encrypt() above. */ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2281,18 +2259,12 @@ out: static int cc_rfc4106_gcm_decrypt(struct aead_request *req) { - /* Very similar to cc_aead_decrypt() above. */ - - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2313,17 +2285,12 @@ out: static int cc_rfc4543_gcm_decrypt(struct aead_request *req) { - /* Very similar to cc_aead_decrypt() above. */ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct device *dev = drvdata_to_dev(ctx->drvdata); struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - int rc = -EINVAL; + int rc; - if (!valid_assoclen(req)) { - dev_dbg(dev, "invalid Assoclen:%u\n", req->assoclen); + rc = crypto_ipsec_check_assoclen(req->assoclen); + if (rc) goto out; - } memset(areq_ctx, 0, sizeof(*areq_ctx)); From 0eae14a067c5b2596c5a1049561cbc86b990c7ce Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Sun, 8 Mar 2020 17:57:09 +0200 Subject: [PATCH 138/162] crypto: ccree - refactor AEAD IV in AAD handling Our handling of ciphers with IV trailing the AAD was correct but overly complicated. Refactor to simplify and possibly save one DMA burst. This has the added bonus of behaving the same as the generic rfc4543 implementation for none compliants inputs where the IV in the iv field was not the same as the IV in the AAD. There should be no change in behaviour with correct inputs. Signed-off-by: Gilad Ben-Yossef Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 27 ++------- drivers/crypto/ccree/cc_aead.h | 3 +- drivers/crypto/ccree/cc_buffer_mgr.c | 89 ++++------------------------ 3 files changed, 16 insertions(+), 103 deletions(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index ede16e37d453..875fa79a03eb 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -1609,7 +1609,6 @@ static void cc_proc_rfc4309_ccm(struct aead_request *req) memcpy(areq_ctx->ctr_iv + CCM_BLOCK_IV_OFFSET, req->iv, CCM_BLOCK_IV_SIZE); req->iv = areq_ctx->ctr_iv; - areq_ctx->assoclen -= CCM_BLOCK_IV_SIZE; } static void cc_set_ghash_desc(struct aead_request *req, @@ -1868,8 +1867,7 @@ static int config_gcm_context(struct aead_request *req) */ __be64 temp64; - temp64 = cpu_to_be64((req_ctx->assoclen + - GCM_BLOCK_RFC4_IV_SIZE + cryptlen) * 8); + temp64 = cpu_to_be64((req_ctx->assoclen + cryptlen) * 8); memcpy(&req_ctx->gcm_len_block.len_a, &temp64, sizeof(temp64)); temp64 = 0; memcpy(&req_ctx->gcm_len_block.len_c, &temp64, 8); @@ -1889,7 +1887,6 @@ static void cc_proc_rfc4_gcm(struct aead_request *req) memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_IV_OFFSET, req->iv, GCM_BLOCK_RFC4_IV_SIZE); req->iv = areq_ctx->ctr_iv; - areq_ctx->assoclen -= GCM_BLOCK_RFC4_IV_SIZE; } static int cc_proc_aead(struct aead_request *req, @@ -2031,9 +2028,6 @@ static int cc_aead_encrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = false; - - areq_ctx->plaintext_authenticate_only = false; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2057,8 +2051,7 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = true; + areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE; cc_proc_rfc4309_ccm(req); @@ -2079,9 +2072,6 @@ static int cc_aead_decrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; areq_ctx->assoclen = req->assoclen; - areq_ctx->is_gcm4543 = false; - - areq_ctx->plaintext_authenticate_only = false; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2103,9 +2093,8 @@ static int cc_rfc4309_ccm_decrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; + areq_ctx->assoclen = req->assoclen - CCM_BLOCK_IV_SIZE; - areq_ctx->is_gcm4543 = true; cc_proc_rfc4309_ccm(req); rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); @@ -2216,11 +2205,9 @@ static int cc_rfc4106_gcm_encrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->plaintext_authenticate_only = false; + areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2248,7 +2235,6 @@ static int cc_rfc4543_gcm_encrypt(struct aead_request *req) areq_ctx->assoclen = req->assoclen; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2270,11 +2256,9 @@ static int cc_rfc4106_gcm_decrypt(struct aead_request *req) /* No generated IV required */ areq_ctx->backup_iv = req->iv; - areq_ctx->assoclen = req->assoclen; - areq_ctx->plaintext_authenticate_only = false; + areq_ctx->assoclen = req->assoclen - GCM_BLOCK_RFC4_IV_SIZE; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) @@ -2302,7 +2286,6 @@ static int cc_rfc4543_gcm_decrypt(struct aead_request *req) areq_ctx->assoclen = req->assoclen; cc_proc_rfc4_gcm(req); - areq_ctx->is_gcm4543 = true; rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT); if (rc != -EINPROGRESS && rc != -EBUSY) diff --git a/drivers/crypto/ccree/cc_aead.h b/drivers/crypto/ccree/cc_aead.h index f12169b57f9d..b69591550730 100644 --- a/drivers/crypto/ccree/cc_aead.h +++ b/drivers/crypto/ccree/cc_aead.h @@ -66,7 +66,7 @@ struct aead_req_ctx { /* used to prevent cache coherence problem */ u8 backup_mac[MAX_MAC_SIZE]; u8 *backup_iv; /* store orig iv */ - u32 assoclen; /* internal assoclen */ + u32 assoclen; /* size of AAD buffer to authenticate */ dma_addr_t mac_buf_dma_addr; /* internal ICV DMA buffer */ /* buffer for internal ccm configurations */ dma_addr_t ccm_iv0_dma_addr; @@ -79,7 +79,6 @@ struct aead_req_ctx { dma_addr_t gcm_iv_inc2_dma_addr; dma_addr_t hkey_dma_addr; /* Phys. address of hkey */ dma_addr_t gcm_block_len_dma_addr; /* Phys. address of gcm block len */ - bool is_gcm4543; u8 *icv_virt_addr; /* Virt. address of ICV */ struct async_gen_req_ctx gen_ctx; diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 526da532a0a3..b2bd093e7013 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -13,12 +13,6 @@ #include "cc_hash.h" #include "cc_aead.h" -enum dma_buffer_type { - DMA_NULL_TYPE = -1, - DMA_SGL_TYPE = 1, - DMA_BUFF_TYPE = 2, -}; - union buffer_array_entry { struct scatterlist *sgl; dma_addr_t buffer_dma; @@ -30,7 +24,6 @@ struct buffer_array { unsigned int offset[MAX_NUM_OF_BUFFERS_IN_MLLI]; int nents[MAX_NUM_OF_BUFFERS_IN_MLLI]; int total_data_len[MAX_NUM_OF_BUFFERS_IN_MLLI]; - enum dma_buffer_type type[MAX_NUM_OF_BUFFERS_IN_MLLI]; bool is_last[MAX_NUM_OF_BUFFERS_IN_MLLI]; u32 *mlli_nents[MAX_NUM_OF_BUFFERS_IN_MLLI]; }; @@ -60,11 +53,7 @@ static void cc_copy_mac(struct device *dev, struct aead_request *req, enum cc_sg_cpy_direct dir) { struct aead_req_ctx *areq_ctx = aead_request_ctx(req); - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - u32 skip = areq_ctx->assoclen + req->cryptlen; - - if (areq_ctx->is_gcm4543) - skip += crypto_aead_ivsize(tfm); + u32 skip = req->assoclen + req->cryptlen; cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src, (skip - areq_ctx->req_authsize), skip, dir); @@ -216,14 +205,8 @@ static int cc_generate_mlli(struct device *dev, struct buffer_array *sg_data, u32 tot_len = sg_data->total_data_len[i]; u32 offset = sg_data->offset[i]; - if (sg_data->type[i] == DMA_SGL_TYPE) - rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len, - offset, &total_nents, - &mlli_p); - else /*DMA_BUFF_TYPE*/ - rc = cc_render_buff_to_mlli(dev, entry->buffer_dma, - tot_len, &total_nents, - &mlli_p); + rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len, offset, + &total_nents, &mlli_p); if (rc) return rc; @@ -249,27 +232,6 @@ build_mlli_exit: return rc; } -static void cc_add_buffer_entry(struct device *dev, - struct buffer_array *sgl_data, - dma_addr_t buffer_dma, unsigned int buffer_len, - bool is_last_entry, u32 *mlli_nents) -{ - unsigned int index = sgl_data->num_of_buffers; - - dev_dbg(dev, "index=%u single_buff=%pad buffer_len=0x%08X is_last=%d\n", - index, &buffer_dma, buffer_len, is_last_entry); - sgl_data->nents[index] = 1; - sgl_data->entry[index].buffer_dma = buffer_dma; - sgl_data->offset[index] = 0; - sgl_data->total_data_len[index] = buffer_len; - sgl_data->type[index] = DMA_BUFF_TYPE; - sgl_data->is_last[index] = is_last_entry; - sgl_data->mlli_nents[index] = mlli_nents; - if (sgl_data->mlli_nents[index]) - *sgl_data->mlli_nents[index] = 0; - sgl_data->num_of_buffers++; -} - static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, unsigned int nents, struct scatterlist *sgl, unsigned int data_len, unsigned int data_offset, @@ -283,7 +245,6 @@ static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, sgl_data->entry[index].sgl = sgl; sgl_data->offset[index] = data_offset; sgl_data->total_data_len[index] = data_len; - sgl_data->type[index] = DMA_SGL_TYPE; sgl_data->is_last[index] = is_last_table; sgl_data->mlli_nents[index] = mlli_nents; if (sgl_data->mlli_nents[index]) @@ -606,17 +567,6 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n", hw_iv_size, req->iv, &areq_ctx->gen_ctx.iv_dma_addr); - if (do_chain && areq_ctx->plaintext_authenticate_only) { - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned int iv_size_to_authenc = crypto_aead_ivsize(tfm); - unsigned int iv_ofs = GCM_BLOCK_RFC4_IV_OFFSET; - /* Chain to given list */ - cc_add_buffer_entry(dev, sg_data, - (areq_ctx->gen_ctx.iv_dma_addr + iv_ofs), - iv_size_to_authenc, is_last, - &areq_ctx->assoc.mlli_nents); - areq_ctx->assoc_buff_type = CC_DMA_BUF_MLLI; - } chain_iv_exit: return rc; @@ -630,13 +580,8 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata, struct aead_req_ctx *areq_ctx = aead_request_ctx(req); int rc = 0; int mapped_nents = 0; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - unsigned int size_of_assoc = areq_ctx->assoclen; struct device *dev = drvdata_to_dev(drvdata); - if (areq_ctx->is_gcm4543) - size_of_assoc += crypto_aead_ivsize(tfm); - if (!sg_data) { rc = -EINVAL; goto chain_assoc_exit; @@ -652,7 +597,7 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata, goto chain_assoc_exit; } - mapped_nents = sg_nents_for_len(req->src, size_of_assoc); + mapped_nents = sg_nents_for_len(req->src, areq_ctx->assoclen); if (mapped_nents < 0) return mapped_nents; @@ -845,16 +790,11 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, u32 src_mapped_nents = 0, dst_mapped_nents = 0; u32 offset = 0; /* non-inplace mode */ - unsigned int size_for_map = areq_ctx->assoclen + req->cryptlen; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned int size_for_map = req->assoclen + req->cryptlen; u32 sg_index = 0; - bool is_gcm4543 = areq_ctx->is_gcm4543; - u32 size_to_skip = areq_ctx->assoclen; + u32 size_to_skip = req->assoclen; struct scatterlist *sgl; - if (is_gcm4543) - size_to_skip += crypto_aead_ivsize(tfm); - offset = size_to_skip; if (!sg_data) @@ -863,9 +803,6 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, areq_ctx->src_sgl = req->src; areq_ctx->dst_sgl = req->dst; - if (is_gcm4543) - size_for_map += crypto_aead_ivsize(tfm); - size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? authsize : 0; src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map, @@ -892,16 +829,13 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, areq_ctx->src_offset = offset; if (req->src != req->dst) { - size_for_map = areq_ctx->assoclen + req->cryptlen; + size_for_map = req->assoclen + req->cryptlen; if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) size_for_map += authsize; else size_for_map -= authsize; - if (is_gcm4543) - size_for_map += crypto_aead_ivsize(tfm); - rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL, &areq_ctx->dst.mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes, @@ -1008,12 +942,10 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) struct buffer_array sg_data; unsigned int authsize = areq_ctx->req_authsize; int rc = 0; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - bool is_gcm4543 = areq_ctx->is_gcm4543; dma_addr_t dma_addr; u32 mapped_nents = 0; u32 dummy = 0; /*used for the assoc data fragments */ - u32 size_to_map = 0; + u32 size_to_map; gfp_t flags = cc_gfp_flags(&req->base); mlli_params->curr_pool = NULL; @@ -1110,14 +1042,13 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) areq_ctx->gcm_iv_inc2_dma_addr = dma_addr; } - size_to_map = req->cryptlen + areq_ctx->assoclen; + size_to_map = req->cryptlen + req->assoclen; /* If we do in-place encryption, we also need the auth tag */ if ((areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT) && (req->src == req->dst)) { size_to_map += authsize; } - if (is_gcm4543) - size_to_map += crypto_aead_ivsize(tfm); + rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, &areq_ctx->src.mapped_nents, (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES + From 3f1ec97aacf19eb44162fc9c413eb4cd66f62002 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Tue, 10 Mar 2020 16:42:49 +0800 Subject: [PATCH 139/162] crypto: hisilicon/qm - Put device finding logic into QM Use struct hisi_qm to maintain device list. Meanwhile, add two external interface into qm, merge find proper device and create qp into QP alloc logic, merge release qps into QP free logic. Signed-off-by: Weili Qian Signed-off-by: Shukun Tan Reviewed-by: Zhou Wang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 125 ++++++++++++++++++++++++++++++++++ drivers/crypto/hisilicon/qm.h | 31 +++++++++ 2 files changed, 156 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 2fa1d1ae0984..f795fb557630 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -272,6 +272,12 @@ struct qm_doorbell { __le16 priority; }; +struct hisi_qm_resource { + struct hisi_qm *qm; + int distance; + struct list_head list; +}; + struct hisi_qm_hw_ops { int (*get_vft)(struct hisi_qm *qm, u32 *base, u32 *number); void (*qm_db)(struct hisi_qm *qm, u16 qn, @@ -2174,6 +2180,125 @@ void hisi_qm_dev_err_uninit(struct hisi_qm *qm) } EXPORT_SYMBOL_GPL(hisi_qm_dev_err_uninit); +/** + * hisi_qm_free_qps() - free multiple queue pairs. + * @qps: The queue pairs need to be freed. + * @qp_num: The num of queue pairs. + */ +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num) +{ + int i; + + if (!qps || qp_num <= 0) + return; + + for (i = qp_num - 1; i >= 0; i--) + hisi_qm_release_qp(qps[i]); +} +EXPORT_SYMBOL_GPL(hisi_qm_free_qps); + +static void free_list(struct list_head *head) +{ + struct hisi_qm_resource *res, *tmp; + + list_for_each_entry_safe(res, tmp, head, list) { + list_del(&res->list); + kfree(res); + } +} + +static int hisi_qm_sort_devices(int node, struct list_head *head, + struct hisi_qm_list *qm_list) +{ + struct hisi_qm_resource *res, *tmp; + struct hisi_qm *qm; + struct list_head *n; + struct device *dev; + int dev_node = 0; + + list_for_each_entry(qm, &qm_list->list, list) { + dev = &qm->pdev->dev; + + if (IS_ENABLED(CONFIG_NUMA)) { + dev_node = dev_to_node(dev); + if (dev_node < 0) + dev_node = 0; + } + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + res->qm = qm; + res->distance = node_distance(dev_node, node); + n = head; + list_for_each_entry(tmp, head, list) { + if (res->distance < tmp->distance) { + n = &tmp->list; + break; + } + } + list_add_tail(&res->list, n); + } + + return 0; +} + +/** + * hisi_qm_alloc_qps_node() - Create multiple queue pairs. + * @qm_list: The list of all available devices. + * @qp_num: The number of queue pairs need created. + * @alg_type: The algorithm type. + * @node: The numa node. + * @qps: The queue pairs need created. + * + * This function will sort all available device according to numa distance. + * Then try to create all queue pairs from one device, if all devices do + * not meet the requirements will return error. + */ +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps) +{ + struct hisi_qm_resource *tmp; + int ret = -ENODEV; + LIST_HEAD(head); + int i; + + if (!qps || !qm_list || qp_num <= 0) + return -EINVAL; + + mutex_lock(&qm_list->lock); + if (hisi_qm_sort_devices(node, &head, qm_list)) { + mutex_unlock(&qm_list->lock); + goto err; + } + + list_for_each_entry(tmp, &head, list) { + for (i = 0; i < qp_num; i++) { + qps[i] = hisi_qm_create_qp(tmp->qm, alg_type); + if (IS_ERR(qps[i])) { + hisi_qm_free_qps(qps, i); + break; + } + } + + if (i == qp_num) { + ret = 0; + break; + } + } + + mutex_unlock(&qm_list->lock); + if (ret) + pr_info("Failed to create qps, node[%d], alg[%d], qp[%d]!\n", + node, alg_type, qp_num); + +err: + free_list(&head); + return ret; +} +EXPORT_SYMBOL_GPL(hisi_qm_alloc_qps_node); + static pci_ers_result_t qm_dev_err_handle(struct hisi_qm *qm) { u32 err_sts; diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h index c72c2e64d04b..ec5b6f48db6c 100644 --- a/drivers/crypto/hisilicon/qm.h +++ b/drivers/crypto/hisilicon/qm.h @@ -145,6 +145,11 @@ struct hisi_qm_err_ini { struct hisi_qm_err_info err_info; }; +struct hisi_qm_list { + struct mutex lock; + struct list_head list; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -156,6 +161,7 @@ struct hisi_qm { u32 qp_num; u32 qp_in_used; u32 ctrl_qp_num; + struct list_head list; struct qm_dma qdma; struct qm_sqc *sqc; @@ -228,6 +234,28 @@ struct hisi_qp { struct uacce_queue *uacce_q; }; +static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list) +{ + INIT_LIST_HEAD(&qm_list->list); + mutex_init(&qm_list->lock); +} + +static inline void hisi_qm_add_to_list(struct hisi_qm *qm, + struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_add_tail(&qm->list, &qm_list->list); + mutex_unlock(&qm_list->lock); +} + +static inline void hisi_qm_del_from_list(struct hisi_qm *qm, + struct hisi_qm_list *qm_list) +{ + mutex_lock(&qm_list->lock); + list_del(&qm->list); + mutex_unlock(&qm_list->lock); +} + int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); @@ -258,4 +286,7 @@ struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, u32 count, u32 sge_nr); void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool); +int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num, + u8 alg_type, int node, struct hisi_qp **qps); +void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num); #endif From 18f1ab3f74d61f773dcad257783bfbcef9ac10ea Mon Sep 17 00:00:00 2001 From: Shukun Tan Date: Tue, 10 Mar 2020 16:42:50 +0800 Subject: [PATCH 140/162] crypto: hisilicon/zip - Use hisi_qm_alloc_qps_node() when init ctx Encapsulate hisi_qm_alloc_qps_node() to new interface to replace find_zip_device(), which will fix the bug of creating QP failure especially in multi-thread scenario. Signed-off-by: Shukun Tan Reviewed-by: Zhou Wang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/zip/zip.h | 2 +- drivers/crypto/hisilicon/zip/zip_crypto.c | 54 ++++++------- drivers/crypto/hisilicon/zip/zip_main.c | 92 ++--------------------- 3 files changed, 34 insertions(+), 114 deletions(-) diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index bc1db26598bb..82dc6f867171 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -68,7 +68,7 @@ struct hisi_zip_sqe { u32 rsvd1[4]; }; -struct hisi_zip *find_zip_device(int node); +int zip_create_qps(struct hisi_qp **qps, int ctx_num); int hisi_zip_register_to_crypto(void); void hisi_zip_unregister_from_crypto(void); #endif diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 9815d5e3ccd0..369ec3220574 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -132,29 +132,25 @@ static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type, sqe->dest_addr_h = upper_32_bits(d_addr); } -static int hisi_zip_create_qp(struct hisi_qm *qm, struct hisi_zip_qp_ctx *ctx, - int alg_type, int req_type) +static int hisi_zip_start_qp(struct hisi_qp *qp, struct hisi_zip_qp_ctx *ctx, + int alg_type, int req_type) { - struct hisi_qp *qp; + struct device *dev = &qp->qm->pdev->dev; int ret; - qp = hisi_qm_create_qp(qm, alg_type); - if (IS_ERR(qp)) - return PTR_ERR(qp); - qp->req_type = req_type; + qp->alg_type = alg_type; qp->qp_ctx = ctx; - ctx->qp = qp; ret = hisi_qm_start_qp(qp, 0); - if (ret < 0) - goto err_release_qp; + if (ret < 0) { + dev_err(dev, "start qp failed!\n"); + return ret; + } + + ctx->qp = qp; return 0; - -err_release_qp: - hisi_qm_release_qp(qp); - return ret; } static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx) @@ -165,34 +161,34 @@ static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx) static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type) { + struct hisi_qp *qps[HZIP_CTX_Q_NUM] = { NULL }; struct hisi_zip *hisi_zip; - struct hisi_qm *qm; int ret, i, j; - /* find the proper zip device */ - hisi_zip = find_zip_device(cpu_to_node(smp_processor_id())); - if (!hisi_zip) { - pr_err("Failed to find a proper ZIP device!\n"); + ret = zip_create_qps(qps, HZIP_CTX_Q_NUM); + if (ret) { + pr_err("Can not create zip qps!\n"); return -ENODEV; } - qm = &hisi_zip->qm; + + hisi_zip = container_of(qps[0]->qm, struct hisi_zip, qm); for (i = 0; i < HZIP_CTX_Q_NUM; i++) { /* alg_type = 0 for compress, 1 for decompress in hw sqe */ - ret = hisi_zip_create_qp(qm, &hisi_zip_ctx->qp_ctx[i], i, - req_type); - if (ret) - goto err; + ret = hisi_zip_start_qp(qps[i], &hisi_zip_ctx->qp_ctx[i], i, + req_type); + if (ret) { + for (j = i - 1; j >= 0; j--) + hisi_qm_stop_qp(hisi_zip_ctx->qp_ctx[j].qp); + + hisi_qm_free_qps(qps, HZIP_CTX_Q_NUM); + return ret; + } hisi_zip_ctx->qp_ctx[i].zip_dev = hisi_zip; } return 0; -err: - for (j = i - 1; j >= 0; j--) - hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[j]); - - return ret; } static void hisi_zip_ctx_exit(struct hisi_zip_ctx *hisi_zip_ctx) diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 25a311248fb8..fcc85d2dbd07 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -88,77 +88,7 @@ static const char hisi_zip_name[] = "hisi_zip"; static struct dentry *hzip_debugfs_root; -static LIST_HEAD(hisi_zip_list); -static DEFINE_MUTEX(hisi_zip_list_lock); - -struct hisi_zip_resource { - struct hisi_zip *hzip; - int distance; - struct list_head list; -}; - -static void free_list(struct list_head *head) -{ - struct hisi_zip_resource *res, *tmp; - - list_for_each_entry_safe(res, tmp, head, list) { - list_del(&res->list); - kfree(res); - } -} - -struct hisi_zip *find_zip_device(int node) -{ - struct hisi_zip_resource *res, *tmp; - struct hisi_zip *ret = NULL; - struct hisi_zip *hisi_zip; - struct list_head *n; - struct device *dev; - LIST_HEAD(head); - - mutex_lock(&hisi_zip_list_lock); - - if (IS_ENABLED(CONFIG_NUMA)) { - list_for_each_entry(hisi_zip, &hisi_zip_list, list) { - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) - goto err; - - dev = &hisi_zip->qm.pdev->dev; - res->hzip = hisi_zip; - res->distance = node_distance(dev_to_node(dev), node); - - n = &head; - list_for_each_entry(tmp, &head, list) { - if (res->distance < tmp->distance) { - n = &tmp->list; - break; - } - } - list_add_tail(&res->list, n); - } - - list_for_each_entry(tmp, &head, list) { - if (hisi_qm_get_free_qp_num(&tmp->hzip->qm)) { - ret = tmp->hzip; - break; - } - } - - free_list(&head); - } else { - ret = list_first_entry(&hisi_zip_list, struct hisi_zip, list); - } - - mutex_unlock(&hisi_zip_list_lock); - - return ret; - -err: - free_list(&head); - mutex_unlock(&hisi_zip_list_lock); - return NULL; -} +static struct hisi_qm_list zip_devices; struct hisi_zip_hw_error { u32 int_msk; @@ -313,18 +243,11 @@ static const struct pci_device_id hisi_zip_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids); -static inline void hisi_zip_add_to_list(struct hisi_zip *hisi_zip) +int zip_create_qps(struct hisi_qp **qps, int qp_num) { - mutex_lock(&hisi_zip_list_lock); - list_add_tail(&hisi_zip->list, &hisi_zip_list); - mutex_unlock(&hisi_zip_list_lock); -} + int node = cpu_to_node(smp_processor_id()); -static inline void hisi_zip_remove_from_list(struct hisi_zip *hisi_zip) -{ - mutex_lock(&hisi_zip_list_lock); - list_del(&hisi_zip->list); - mutex_unlock(&hisi_zip_list_lock); + return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps); } static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip) @@ -891,7 +814,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) dev_err(&pdev->dev, "Failed to init debugfs (%d)!\n", ret); - hisi_zip_add_to_list(hisi_zip); + hisi_qm_add_to_list(qm, &zip_devices); if (qm->uacce) { ret = uacce_register(qm->uacce); @@ -908,7 +831,7 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_remove_from_list: - hisi_zip_remove_from_list(hisi_zip); + hisi_qm_del_from_list(qm, &zip_devices); hisi_zip_debugfs_exit(hisi_zip); hisi_qm_stop(qm); err_qm_uninit: @@ -937,7 +860,7 @@ static void hisi_zip_remove(struct pci_dev *pdev) hisi_qm_dev_err_uninit(qm); hisi_qm_uninit(qm); - hisi_zip_remove_from_list(hisi_zip); + hisi_qm_del_from_list(qm, &zip_devices); } static const struct pci_error_handlers hisi_zip_err_handler = { @@ -971,6 +894,7 @@ static int __init hisi_zip_init(void) { int ret; + hisi_qm_init_list(&zip_devices); hisi_zip_register_debugfs(); ret = pci_register_driver(&hisi_zip_pci_driver); From 0f4146fadc580b7b479aa46a9991ed451ed911eb Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Tue, 10 Mar 2020 16:42:51 +0800 Subject: [PATCH 141/162] crypto: hisilicon/hpre - Optimize finding hpre device process Optimize finding hpre device process according to priority of numa distance. Signed-off-by: Hui Tang Signed-off-by: Shukun Tan Reviewed-by: Zhou Wang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre.h | 3 +- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 20 +++----- drivers/crypto/hisilicon/hpre/hpre_main.c | 52 ++++++--------------- 3 files changed, 20 insertions(+), 55 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre.h b/drivers/crypto/hisilicon/hpre/hpre.h index ddf13ea9862a..03d512ec6336 100644 --- a/drivers/crypto/hisilicon/hpre/hpre.h +++ b/drivers/crypto/hisilicon/hpre/hpre.h @@ -46,7 +46,6 @@ struct hpre_debug { struct hpre { struct hisi_qm qm; - struct list_head list; struct hpre_debug debug; u32 num_vfs; unsigned long status; @@ -76,7 +75,7 @@ struct hpre_sqe { __le32 rsvd1[_HPRE_SQE_ALIGN_EXT]; }; -struct hpre *hpre_find_device(int node); +struct hisi_qp *hpre_create_qp(void); int hpre_algs_register(void); void hpre_algs_unregister(void); diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 5d400d69e8e4..65425250b2e9 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -147,26 +147,18 @@ static void hpre_rm_req_from_ctx(struct hpre_asym_request *hpre_req) static struct hisi_qp *hpre_get_qp_and_start(void) { struct hisi_qp *qp; - struct hpre *hpre; int ret; - /* find the proper hpre device, which is near the current CPU core */ - hpre = hpre_find_device(cpu_to_node(smp_processor_id())); - if (!hpre) { - pr_err("Can not find proper hpre device!\n"); - return ERR_PTR(-ENODEV); - } - - qp = hisi_qm_create_qp(&hpre->qm, 0); - if (IS_ERR(qp)) { - pci_err(hpre->qm.pdev, "Can not create qp!\n"); + qp = hpre_create_qp(); + if (!qp) { + pr_err("Can not create hpre qp!\n"); return ERR_PTR(-ENODEV); } ret = hisi_qm_start_qp(qp, 0); if (ret < 0) { - hisi_qm_release_qp(qp); - pci_err(hpre->qm.pdev, "Can not start qp!\n"); + hisi_qm_free_qps(&qp, 1); + pci_err(qp->qm->pdev, "Can not start qp!\n"); return ERR_PTR(-EINVAL); } @@ -338,7 +330,7 @@ static void hpre_ctx_clear(struct hpre_ctx *ctx, bool is_clear_all) if (is_clear_all) { idr_destroy(&ctx->req_idr); kfree(ctx->req_list); - hisi_qm_release_qp(ctx->qp); + hisi_qm_free_qps(&ctx->qp, 1); } ctx->crt_g2_mode = false; diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 0ba4a92d4618..88be53bf4a38 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -82,8 +82,7 @@ #define HPRE_VIA_MSI_DSM 1 -static LIST_HEAD(hpre_list); -static DEFINE_MUTEX(hpre_list_lock); +static struct hisi_qm_list hpre_devices; static const char hpre_name[] = "hisi_hpre"; static struct dentry *hpre_debugfs_root; static const struct pci_device_id hpre_dev_ids[] = { @@ -196,43 +195,17 @@ static u32 hpre_pf_q_num = HPRE_PF_DEF_Q_NUM; module_param_cb(hpre_pf_q_num, &hpre_pf_q_num_ops, &hpre_pf_q_num, 0444); MODULE_PARM_DESC(hpre_pf_q_num, "Number of queues in PF of CS(1-1024)"); -static inline void hpre_add_to_list(struct hpre *hpre) +struct hisi_qp *hpre_create_qp(void) { - mutex_lock(&hpre_list_lock); - list_add_tail(&hpre->list, &hpre_list); - mutex_unlock(&hpre_list_lock); -} + int node = cpu_to_node(smp_processor_id()); + struct hisi_qp *qp = NULL; + int ret; -static inline void hpre_remove_from_list(struct hpre *hpre) -{ - mutex_lock(&hpre_list_lock); - list_del(&hpre->list); - mutex_unlock(&hpre_list_lock); -} + ret = hisi_qm_alloc_qps_node(&hpre_devices, 1, 0, node, &qp); + if (!ret) + return qp; -struct hpre *hpre_find_device(int node) -{ - struct hpre *hpre, *ret = NULL; - int min_distance = INT_MAX; - struct device *dev; - int dev_node = 0; - - mutex_lock(&hpre_list_lock); - list_for_each_entry(hpre, &hpre_list, list) { - dev = &hpre->qm.pdev->dev; -#ifdef CONFIG_NUMA - dev_node = dev->numa_node; - if (dev_node < 0) - dev_node = 0; -#endif - if (node_distance(dev_node, node) < min_distance) { - ret = hpre; - min_distance = node_distance(dev_node, node); - } - } - mutex_unlock(&hpre_list_lock); - - return ret; + return NULL; } static int hpre_cfg_by_dsm(struct hisi_qm *qm) @@ -799,17 +772,17 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) dev_warn(&pdev->dev, "init debugfs fail!\n"); - hpre_add_to_list(hpre); + hisi_qm_add_to_list(qm, &hpre_devices); ret = hpre_algs_register(); if (ret < 0) { - hpre_remove_from_list(hpre); pci_err(pdev, "fail to register algs to crypto!\n"); goto err_with_qm_start; } return 0; err_with_qm_start: + hisi_qm_del_from_list(qm, &hpre_devices); hisi_qm_stop(qm); err_with_err_init: @@ -929,7 +902,7 @@ static void hpre_remove(struct pci_dev *pdev) int ret; hpre_algs_unregister(); - hpre_remove_from_list(hpre); + hisi_qm_del_from_list(qm, &hpre_devices); if (qm->fun_type == QM_HW_PF && hpre->num_vfs != 0) { ret = hpre_sriov_disable(pdev); if (ret) { @@ -979,6 +952,7 @@ static int __init hpre_init(void) { int ret; + hisi_qm_init_list(&hpre_devices); hpre_register_debugfs(); ret = pci_register_driver(&hpre_pci_driver); From 0b5e43bc1258bb400378fed99beae032dbece045 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Tue, 10 Mar 2020 16:42:52 +0800 Subject: [PATCH 142/162] crypto: hisilicon/sec2 - Add new create qp process Combine found device and created qp into one operation instead of found device and create qp both are independent operations. when execute multiple tasks, the different threads may find same device at the same time, but the number of queues is insufficient on the device. causing one of threads fail to create a qp. Now fix this, First find device then create qp, if result failure. the current thread will find next device. Signed-off-by: Kai Ye Signed-off-by: Shukun Tan Reviewed-by: Zhou Wang Reviewed-by: Zaibo Xu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 5 +- drivers/crypto/hisilicon/sec2/sec_crypto.c | 17 +++-- drivers/crypto/hisilicon/sec2/sec_main.c | 81 +++++++++------------- 3 files changed, 42 insertions(+), 61 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index a73d82c9e180..3598fa17beb2 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -119,6 +119,7 @@ struct sec_ctx { struct sec_qp_ctx *qp_ctx; struct sec_dev *sec; const struct sec_req_op *req_op; + struct hisi_qp **qps; /* Half queues for encipher, and half for decipher */ u32 hlf_q_num; @@ -168,7 +169,6 @@ struct sec_debug { struct sec_dev { struct hisi_qm qm; - struct list_head list; struct sec_debug debug; u32 ctx_q_num; bool iommu_used; @@ -176,7 +176,8 @@ struct sec_dev { unsigned long status; }; -struct sec_dev *sec_find_device(int node); +void sec_destroy_qps(struct hisi_qp **qps, int qp_num); +struct hisi_qp **sec_create_qps(void); int sec_register_to_crypto(void); void sec_unregister_from_crypto(void); #endif diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index b2557431feb4..7f1c6a31b82f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -355,11 +355,8 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, struct hisi_qp *qp; int ret = -ENOMEM; - qp = hisi_qm_create_qp(qm, alg_type); - if (IS_ERR(qp)) - return PTR_ERR(qp); - qp_ctx = &ctx->qp_ctx[qp_ctx_id]; + qp = ctx->qps[qp_ctx_id]; qp->req_type = 0; qp->qp_ctx = qp_ctx; qp->req_cb = sec_req_cb; @@ -402,7 +399,6 @@ err_free_c_in_pool: hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool); err_destroy_idr: idr_destroy(&qp_ctx->req_idr); - hisi_qm_release_qp(qp); return ret; } @@ -419,7 +415,6 @@ static void sec_release_qp_ctx(struct sec_ctx *ctx, hisi_acc_free_sgl_pool(dev, qp_ctx->c_in_pool); idr_destroy(&qp_ctx->req_idr); - hisi_qm_release_qp(qp_ctx->qp); } static int sec_ctx_base_init(struct sec_ctx *ctx) @@ -427,11 +422,13 @@ static int sec_ctx_base_init(struct sec_ctx *ctx) struct sec_dev *sec; int i, ret; - sec = sec_find_device(cpu_to_node(smp_processor_id())); - if (!sec) { - pr_err("Can not find proper Hisilicon SEC device!\n"); + ctx->qps = sec_create_qps(); + if (!ctx->qps) { + pr_err("Can not create sec qps!\n"); return -ENODEV; } + + sec = container_of(ctx->qps[0]->qm, struct sec_dev, qm); ctx->sec = sec; ctx->hlf_q_num = sec->ctx_q_num >> 1; @@ -455,6 +452,7 @@ err_sec_release_qp_ctx: for (i = i - 1; i >= 0; i--) sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]); + sec_destroy_qps(ctx->qps, sec->ctx_q_num); kfree(ctx->qp_ctx); return ret; } @@ -466,6 +464,7 @@ static void sec_ctx_base_uninit(struct sec_ctx *ctx) for (i = 0; i < ctx->sec->ctx_q_num; i++) sec_release_qp_ctx(ctx, &ctx->qp_ctx[i]); + sec_destroy_qps(ctx->qps, ctx->sec->ctx_q_num); kfree(ctx->qp_ctx); } diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 4f354d79fa7e..1f54ebe164b6 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -90,8 +90,7 @@ struct sec_hw_error { static const char sec_name[] = "hisi_sec2"; static struct dentry *sec_debugfs_root; -static LIST_HEAD(sec_list); -static DEFINE_MUTEX(sec_list_lock); +static struct hisi_qm_list sec_devices; static const struct sec_hw_error sec_hw_errors[] = { {.int_msk = BIT(0), .msg = "sec_axi_rresp_err_rint"}, @@ -106,37 +105,6 @@ static const struct sec_hw_error sec_hw_errors[] = { { /* sentinel */ } }; -struct sec_dev *sec_find_device(int node) -{ -#define SEC_NUMA_MAX_DISTANCE 100 - int min_distance = SEC_NUMA_MAX_DISTANCE; - int dev_node = 0, free_qp_num = 0; - struct sec_dev *sec, *ret = NULL; - struct hisi_qm *qm; - struct device *dev; - - mutex_lock(&sec_list_lock); - list_for_each_entry(sec, &sec_list, list) { - qm = &sec->qm; - dev = &qm->pdev->dev; -#ifdef CONFIG_NUMA - dev_node = dev->numa_node; - if (dev_node < 0) - dev_node = 0; -#endif - if (node_distance(dev_node, node) < min_distance) { - free_qp_num = hisi_qm_get_free_qp_num(qm); - if (free_qp_num >= sec->ctx_q_num) { - ret = sec; - min_distance = node_distance(dev_node, node); - } - } - } - mutex_unlock(&sec_list_lock); - - return ret; -} - static const char * const sec_dbg_file_name[] = { [SEC_CURRENT_QM] = "current_qm", [SEC_CLEAR_ENABLE] = "clear_enable", @@ -239,6 +207,32 @@ static u32 ctx_q_num = SEC_CTX_Q_NUM_DEF; module_param_cb(ctx_q_num, &sec_ctx_q_num_ops, &ctx_q_num, 0444); MODULE_PARM_DESC(ctx_q_num, "Queue num in ctx (24 default, 2, 4, ..., 32)"); +void sec_destroy_qps(struct hisi_qp **qps, int qp_num) +{ + hisi_qm_free_qps(qps, qp_num); + kfree(qps); +} + +struct hisi_qp **sec_create_qps(void) +{ + int node = cpu_to_node(smp_processor_id()); + u32 ctx_num = ctx_q_num; + struct hisi_qp **qps; + int ret; + + qps = kcalloc(ctx_num, sizeof(struct hisi_qp *), GFP_KERNEL); + if (!qps) + return NULL; + + ret = hisi_qm_alloc_qps_node(&sec_devices, ctx_num, 0, node, qps); + if (!ret) + return qps; + + kfree(qps); + return NULL; +} + + static const struct pci_device_id sec_dev_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_PF_PCI_DEVICE_ID) }, { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, SEC_VF_PCI_DEVICE_ID) }, @@ -246,20 +240,6 @@ static const struct pci_device_id sec_dev_ids[] = { }; MODULE_DEVICE_TABLE(pci, sec_dev_ids); -static inline void sec_add_to_list(struct sec_dev *sec) -{ - mutex_lock(&sec_list_lock); - list_add_tail(&sec->list, &sec_list); - mutex_unlock(&sec_list_lock); -} - -static inline void sec_remove_from_list(struct sec_dev *sec) -{ - mutex_lock(&sec_list_lock); - list_del(&sec->list); - mutex_unlock(&sec_list_lock); -} - static u8 sec_get_endian(struct sec_dev *sec) { struct hisi_qm *qm = &sec->qm; @@ -889,7 +869,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) pci_warn(pdev, "Failed to init debugfs!\n"); - sec_add_to_list(sec); + hisi_qm_add_to_list(qm, &sec_devices); ret = sec_register_to_crypto(); if (ret < 0) { @@ -900,7 +880,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_remove_from_list: - sec_remove_from_list(sec); + hisi_qm_del_from_list(qm, &sec_devices); sec_debugfs_exit(sec); hisi_qm_stop(qm); @@ -1024,7 +1004,7 @@ static void sec_remove(struct pci_dev *pdev) sec_unregister_from_crypto(); - sec_remove_from_list(sec); + hisi_qm_del_from_list(qm, &sec_devices); if (qm->fun_type == QM_HW_PF && sec->num_vfs) (void)sec_sriov_disable(pdev); @@ -1071,6 +1051,7 @@ static int __init sec_init(void) { int ret; + hisi_qm_init_list(&sec_devices); sec_register_debugfs(); ret = pci_register_driver(&sec_pci_driver); From 2638268fa9ebe4f13a8aee89044719f2084f80c8 Mon Sep 17 00:00:00 2001 From: Jianhui Zhao Date: Tue, 10 Mar 2020 20:25:51 +0800 Subject: [PATCH 143/162] crypto: atmel-i2c - Fix wakeup fail The wake token cannot be sent without ignoring the nack for the device address Signed-off-by: Jianhui Zhao Signed-off-by: Herbert Xu --- drivers/crypto/atmel-i2c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 1d3355913b40..e8e8281e027d 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -176,7 +176,8 @@ static int atmel_i2c_wakeup(struct i2c_client *client) * device is idle, asleep or during waking up. Don't check for error * when waking up the device. */ - i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz); + i2c_transfer_buffer_flags(client, i2c_priv->wake_token, + i2c_priv->wake_token_sz, I2C_M_IGNORE_NAK); /* * Wait to wake the device. Typical execution times for ecdh and genkey From 7fe8e483ec6f62ad43e9c2ba7e07b047f4d35d69 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 08:15:06 +0100 Subject: [PATCH 144/162] crypto: bcm - Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Signed-off-by: Takashi Iwai Signed-off-by: Herbert Xu --- drivers/crypto/bcm/util.c | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c index cd7504101acd..2b304fc78059 100644 --- a/drivers/crypto/bcm/util.c +++ b/drivers/crypto/bcm/util.c @@ -366,88 +366,88 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, ipriv = filp->private_data; out_offset = 0; - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Number of SPUs.........%u\n", ipriv->spu.num_spu); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Current sessions.......%u\n", atomic_read(&ipriv->session_count)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Session count..........%u\n", atomic_read(&ipriv->stream_count)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Cipher setkey..........%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_CIPHER])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Cipher Ops.............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_CIPHER])); for (alg = 0; alg < CIPHER_ALG_LAST; alg++) { for (mode = 0; mode < CIPHER_MODE_LAST; mode++) { op_cnt = atomic_read(&ipriv->cipher_cnt[alg][mode]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", spu_alg_name(alg, mode), op_cnt); } } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Hash Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_HASH])); for (alg = 0; alg < HASH_ALG_LAST; alg++) { op_cnt = atomic_read(&ipriv->hash_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", hash_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "HMAC setkey............%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_HMAC])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "HMAC Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_HMAC])); for (alg = 0; alg < HASH_ALG_LAST; alg++) { op_cnt = atomic_read(&ipriv->hmac_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", hash_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "AEAD setkey............%u\n", atomic_read(&ipriv->setkey_cnt[SPU_OP_AEAD])); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "AEAD Ops...............%u\n", atomic_read(&ipriv->op_counts[SPU_OP_AEAD])); for (alg = 0; alg < AEAD_TYPE_LAST; alg++) { op_cnt = atomic_read(&ipriv->aead_cnt[alg]); if (op_cnt) { - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, " %-13s%11u\n", aead_alg_name[alg], op_cnt); } } - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Bytes of req data......%llu\n", (u64)atomic64_read(&ipriv->bytes_out)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Bytes of resp data.....%llu\n", (u64)atomic64_read(&ipriv->bytes_in)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Mailbox full...........%u\n", atomic_read(&ipriv->mb_no_spc)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Mailbox send failures..%u\n", atomic_read(&ipriv->mb_send_fail)); - out_offset += snprintf(buf + out_offset, out_count - out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "Check ICV errors.......%u\n", atomic_read(&ipriv->bad_icv)); if (ipriv->spu.spu_type == SPU_TYPE_SPUM) @@ -455,7 +455,7 @@ static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, spu_ofifo_ctrl = ioread32(ipriv->spu.reg_vbase[i] + SPU_OFIFO_CTRL); fifo_len = spu_ofifo_ctrl & SPU_FIFO_WATERMARK; - out_offset += snprintf(buf + out_offset, + out_offset += scnprintf(buf + out_offset, out_count - out_offset, "SPU %d output FIFO high water.....%u\n", i, fifo_len); From 82ff493eb77cfd28fbfea6ce29bdaaffe238c0b2 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Fri, 13 Mar 2020 12:02:58 +0100 Subject: [PATCH 145/162] crypto: arm/neon - memzero_explicit aes-cbc key At function exit, do not leave the expanded key in the rk struct which got allocated on the stack. Signed-off-by: Torsten Duwe Acked-by: Will Deacon Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-neonbs-glue.c | 1 + arch/arm64/crypto/aes-neonbs-glue.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index e85839a8aaeb..e6fd32919c81 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -138,6 +138,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, kernel_neon_begin(); aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); kernel_neon_end(); + memzero_explicit(&rk, sizeof(rk)); return crypto_cipher_setkey(ctx->enc_tfm, in_key, key_len); } diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index e3e27349a9fe..fb507d569922 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -151,6 +151,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, kernel_neon_begin(); aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); kernel_neon_end(); + memzero_explicit(&rk, sizeof(rk)); return 0; } From 655ff1a1a727f9b83df317c4ad3b2f4a48f6206c Mon Sep 17 00:00:00 2001 From: SrujanaChalla Date: Fri, 13 Mar 2020 17:17:05 +0530 Subject: [PATCH 146/162] crypto: marvell - create common Kconfig and Makefile for Marvell Creats common Kconfig and Makefile for Marvell crypto drivers. Signed-off-by: SrujanaChalla Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 15 +-------- drivers/crypto/Makefile | 2 +- drivers/crypto/marvell/Kconfig | 21 ++++++++++++ drivers/crypto/marvell/Makefile | 6 ++-- drivers/crypto/marvell/cesa/Makefile | 3 ++ drivers/crypto/marvell/{ => cesa}/cesa.c | 0 drivers/crypto/marvell/{ => cesa}/cesa.h | 5 +-- drivers/crypto/marvell/{ => cesa}/cipher.c | 15 +++++---- drivers/crypto/marvell/{ => cesa}/hash.c | 38 +++++++++++++--------- drivers/crypto/marvell/{ => cesa}/tdma.c | 10 +++--- 10 files changed, 69 insertions(+), 46 deletions(-) create mode 100644 drivers/crypto/marvell/Kconfig create mode 100644 drivers/crypto/marvell/cesa/Makefile rename drivers/crypto/marvell/{ => cesa}/cesa.c (100%) rename drivers/crypto/marvell/{ => cesa}/cesa.h (99%) rename drivers/crypto/marvell/{ => cesa}/cipher.c (98%) rename drivers/crypto/marvell/{ => cesa}/hash.c (98%) rename drivers/crypto/marvell/{ => cesa}/tdma.c (97%) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index f81d4cf86143..2c887e4d005a 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -233,20 +233,6 @@ config CRYPTO_CRC32_S390 It is available with IBM z13 or later. -config CRYPTO_DEV_MARVELL_CESA - tristate "Marvell's Cryptographic Engine driver" - depends on PLAT_ORION || ARCH_MVEBU - select CRYPTO_LIB_AES - select CRYPTO_LIB_DES - select CRYPTO_SKCIPHER - select CRYPTO_HASH - select SRAM - help - This driver allows you to utilize the Cryptographic Engines and - Security Accelerator (CESA) which can be found on MVEBU and ORION - platforms. - This driver supports CPU offload through DMA transfers. - config CRYPTO_DEV_NIAGARA2 tristate "Niagara2 Stream Processing Unit driver" select CRYPTO_LIB_DES @@ -606,6 +592,7 @@ config CRYPTO_DEV_MXS_DCP source "drivers/crypto/qat/Kconfig" source "drivers/crypto/cavium/cpt/Kconfig" source "drivers/crypto/cavium/nitrox/Kconfig" +source "drivers/crypto/marvell/Kconfig" config CRYPTO_DEV_CAVIUM_ZIP tristate "Cavium ZIP driver" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index d505d16ec00e..944ed7226e37 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o -obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ +obj-$(CONFIG_CRYPTO_DEV_MARVELL) += marvell/ obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig new file mode 100644 index 000000000000..8262b14c6287 --- /dev/null +++ b/drivers/crypto/marvell/Kconfig @@ -0,0 +1,21 @@ +# +# Marvell crypto drivers configuration +# + +config CRYPTO_DEV_MARVELL + tristate + +config CRYPTO_DEV_MARVELL_CESA + tristate "Marvell's Cryptographic Engine driver" + depends on PLAT_ORION || ARCH_MVEBU + select CRYPTO_LIB_AES + select CRYPTO_LIB_DES + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select SRAM + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Cryptographic Engines and + Security Accelerator (CESA) which can be found on MVEBU and ORION + platforms. + This driver supports CPU offload through DMA transfers. diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile index b27cab65e696..2030b0b55e91 100644 --- a/drivers/crypto/marvell/Makefile +++ b/drivers/crypto/marvell/Makefile @@ -1,3 +1,3 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o -marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += cesa/ diff --git a/drivers/crypto/marvell/cesa/Makefile b/drivers/crypto/marvell/cesa/Makefile new file mode 100644 index 000000000000..b27cab65e696 --- /dev/null +++ b/drivers/crypto/marvell/cesa/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o +marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa/cesa.c similarity index 100% rename from drivers/crypto/marvell/cesa.c rename to drivers/crypto/marvell/cesa/cesa.c diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa/cesa.h similarity index 99% rename from drivers/crypto/marvell/cesa.h rename to drivers/crypto/marvell/cesa/cesa.h index f1ed3b85c0d2..e8632d5f343f 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -436,7 +436,7 @@ struct mv_cesa_dev { * @queue: fifo of the pending crypto requests * @load: engine load counter, useful for load balancing * @chain: list of the current tdma descriptors being processed - * by this engine. + * by this engine. * @complete_queue: fifo of the processed requests by the engine * * Structure storing CESA engine information. @@ -467,7 +467,7 @@ struct mv_cesa_engine { * @step: launch the crypto operation on the next chunk * @cleanup: cleanup the crypto request (release associated data) * @complete: complete the request, i.e copy result or context from sram when - * needed. + * needed. */ struct mv_cesa_req_ops { int (*process)(struct crypto_async_request *req, u32 status); @@ -734,6 +734,7 @@ static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) for (i = 0; i < cesa_dev->caps->nengines; i++) { struct mv_cesa_engine *engine = cesa_dev->engines + i; u32 load = atomic_read(&engine->load); + if (load < min_load) { min_load = load; selected = engine; diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cesa/cipher.c similarity index 98% rename from drivers/crypto/marvell/cipher.c rename to drivers/crypto/marvell/cesa/cipher.c index c24f34a48cef..f133c2ccb5ae 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -106,8 +106,8 @@ static void mv_cesa_skcipher_std_step(struct skcipher_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -178,6 +178,7 @@ static inline void mv_cesa_skcipher_prepare(struct crypto_async_request *req, { struct skcipher_request *skreq = skcipher_request_cast(req); struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq); + creq->base.engine = engine; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) @@ -336,7 +337,8 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req, do { struct mv_cesa_op_ctx *op; - op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, flags); + op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, + flags); if (IS_ERR(op)) { ret = PTR_ERR(op); goto err_free_tdma; @@ -365,9 +367,10 @@ static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req, } while (mv_cesa_skcipher_req_iter_next_op(&iter)); /* Add output data for IV */ - ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET, - CESA_SA_DATA_SRAM_OFFSET, - CESA_TDMA_SRC_IN_SRAM, flags); + ret = mv_cesa_dma_add_result_op(&basereq->chain, + CESA_SA_CFG_SRAM_OFFSET, + CESA_SA_DATA_SRAM_OFFSET, + CESA_TDMA_SRC_IN_SRAM, flags); if (ret) goto err_free_tdma; diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/cesa/hash.c similarity index 98% rename from drivers/crypto/marvell/hash.c rename to drivers/crypto/marvell/cesa/hash.c index a2b35fb0fb89..b971284332b6 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -141,9 +141,11 @@ static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf) if (creq->algo_le) { __le64 bits = cpu_to_le64(creq->len << 3); + memcpy(buf + padlen, &bits, sizeof(bits)); } else { __be64 bits = cpu_to_be64(creq->len << 3); + memcpy(buf + padlen, &bits, sizeof(bits)); } @@ -168,7 +170,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) if (!sreq->offset) { digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); for (i = 0; i < digsize / 4; i++) - writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); + writel_relaxed(creq->state[i], + engine->regs + CESA_IVDIG(i)); } if (creq->cache_ptr) @@ -245,8 +248,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -329,11 +332,12 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ && - (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) { + (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == + CESA_TDMA_RESULT) { __le32 *data = NULL; /* - * Result is already in the correct endianess when the SA is + * Result is already in the correct endianness when the SA is * used */ data = creq->base.chain.last->op->ctx.hash.hash; @@ -347,9 +351,9 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) CESA_IVDIG(i)); if (creq->last_req) { /* - * Hardware's MD5 digest is in little endian format, but - * SHA in big endian format - */ + * Hardware's MD5 digest is in little endian format, but + * SHA in big endian format + */ if (creq->algo_le) { __le32 *result = (void *)ahashreq->result; @@ -439,7 +443,8 @@ static bool mv_cesa_ahash_cache_req(struct ahash_request *req) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); bool cached = false; - if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && !creq->last_req) { + if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && + !creq->last_req) { cached = true; if (!req->nbytes) @@ -648,7 +653,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (!mv_cesa_ahash_req_iter_next_op(&iter)) break; - op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, + &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); @@ -920,7 +926,7 @@ struct ahash_alg mv_md5_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -990,7 +996,7 @@ struct ahash_alg mv_sha1_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1063,7 +1069,7 @@ struct ahash_alg mv_sha256_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), .cra_init = mv_cesa_ahash_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1297,7 +1303,7 @@ struct ahash_alg mv_ahmac_md5_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1367,7 +1373,7 @@ struct ahash_alg mv_ahmac_sha1_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; @@ -1437,6 +1443,6 @@ struct ahash_alg mv_ahmac_sha256_alg = { .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), .cra_init = mv_cesa_ahmac_cra_init, .cra_module = THIS_MODULE, - } + } } }; diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/cesa/tdma.c similarity index 97% rename from drivers/crypto/marvell/tdma.c rename to drivers/crypto/marvell/cesa/tdma.c index 45939d53e8d6..b81ee276fe0e 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/cesa/tdma.c @@ -50,8 +50,8 @@ void mv_cesa_dma_step(struct mv_cesa_req *dreq) engine->regs + CESA_SA_CFG); writel_relaxed(dreq->chain.first->cur_dma, engine->regs + CESA_TDMA_NEXT_ADDR); - BUG_ON(readl(engine->regs + CESA_SA_CMD) & - CESA_SA_CMD_EN_CESA_SA_ACCL0); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } @@ -175,8 +175,10 @@ int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) break; } - /* Save the last request in error to engine->req, so that the core - * knows which request was fautly */ + /* + * Save the last request in error to engine->req, so that the core + * knows which request was fautly + */ if (res) { spin_lock_bh(&engine->lock); engine->req = req; From d9110b0b01ff1cd02751cd5c2c94e938a8906083 Mon Sep 17 00:00:00 2001 From: SrujanaChalla Date: Fri, 13 Mar 2020 17:17:06 +0530 Subject: [PATCH 147/162] crypto: marvell - add support for OCTEON TX CPT engine Add support for the cryptographic acceleration unit (CPT) on OcteonTX CN83XX SoC. Co-developed-by: Lukasz Bartosik Signed-off-by: Lukasz Bartosik Signed-off-by: SrujanaChalla Signed-off-by: Herbert Xu --- MAINTAINERS | 1 + drivers/crypto/marvell/octeontx/Makefile | 4 + .../crypto/marvell/octeontx/otx_cpt_common.h | 51 + .../marvell/octeontx/otx_cpt_hw_types.h | 249 +++ drivers/crypto/marvell/octeontx/otx_cptpf.h | 34 + .../crypto/marvell/octeontx/otx_cptpf_main.c | 307 +++ .../crypto/marvell/octeontx/otx_cptpf_mbox.c | 253 +++ .../crypto/marvell/octeontx/otx_cptpf_ucode.c | 1686 +++++++++++++++++ .../crypto/marvell/octeontx/otx_cptpf_ucode.h | 180 ++ 9 files changed, 2765 insertions(+) create mode 100644 drivers/crypto/marvell/octeontx/Makefile create mode 100644 drivers/crypto/marvell/octeontx/otx_cpt_common.h create mode 100644 drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h create mode 100644 drivers/crypto/marvell/octeontx/otx_cptpf.h create mode 100644 drivers/crypto/marvell/octeontx/otx_cptpf_main.c create mode 100644 drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c create mode 100644 drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c create mode 100644 drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h diff --git a/MAINTAINERS b/MAINTAINERS index ce3597e7e6d0..c312b65746c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10018,6 +10018,7 @@ F: Documentation/devicetree/bindings/phy/phy-mvebu-utmi.txt MARVELL CRYPTO DRIVER M: Boris Brezillon M: Arnaud Ebalard +M: Srujana Challa F: drivers/crypto/marvell/ S: Maintained L: linux-crypto@vger.kernel.org diff --git a/drivers/crypto/marvell/octeontx/Makefile b/drivers/crypto/marvell/octeontx/Makefile new file mode 100644 index 000000000000..627d00eb9b3b --- /dev/null +++ b/drivers/crypto/marvell/octeontx/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o + +octeontx-cpt-objs := otx_cptpf_main.o otx_cptpf_mbox.o otx_cptpf_ucode.o diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_common.h b/drivers/crypto/marvell/octeontx/otx_cpt_common.h new file mode 100644 index 000000000000..ca704a7a265f --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cpt_common.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_COMMON_H +#define __OTX_CPT_COMMON_H + +#include +#include +#include + +#define OTX_CPT_MAX_MBOX_DATA_STR_SIZE 64 + +enum otx_cptpf_type { + OTX_CPT_AE = 2, + OTX_CPT_SE = 3, + BAD_OTX_CPTPF_TYPE, +}; + +enum otx_cptvf_type { + OTX_CPT_AE_TYPES = 1, + OTX_CPT_SE_TYPES = 2, + BAD_OTX_CPTVF_TYPE, +}; + +/* VF-PF message opcodes */ +enum otx_cpt_mbox_opcode { + OTX_CPT_MSG_VF_UP = 1, + OTX_CPT_MSG_VF_DOWN, + OTX_CPT_MSG_READY, + OTX_CPT_MSG_QLEN, + OTX_CPT_MSG_QBIND_GRP, + OTX_CPT_MSG_VQ_PRIORITY, + OTX_CPT_MSG_PF_TYPE, + OTX_CPT_MSG_ACK, + OTX_CPT_MSG_NACK +}; + +/* OcteonTX CPT mailbox structure */ +struct otx_cpt_mbox { + u64 msg; /* Message type MBOX[0] */ + u64 data;/* Data MBOX[1] */ +}; + +#endif /* __OTX_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h new file mode 100644 index 000000000000..bec483fbfefd --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_HW_TYPES_H +#define __OTX_CPT_HW_TYPES_H + +#include + +/* Device IDs */ +#define OTX_CPT_PCI_PF_DEVICE_ID 0xa040 + +#define OTX_CPT_PCI_PF_SUBSYS_ID 0xa340 + +/* Configuration and status registers are in BAR0 on OcteonTX platform */ +#define OTX_CPT_PF_PCI_CFG_BAR 0 +/* Mailbox interrupts offset */ +#define OTX_CPT_PF_MBOX_INT 3 +#define OTX_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a)) +/* Number of MSIX supported in PF */ +#define OTX_CPT_PF_MSIX_VECTORS 4 +/* Maximum supported microcode groups */ +#define OTX_CPT_MAX_ENGINE_GROUPS 8 + +/* OcteonTX CPT PF registers */ +#define OTX_CPT_PF_CONSTANTS (0x0ll) +#define OTX_CPT_PF_RESET (0x100ll) +#define OTX_CPT_PF_DIAG (0x120ll) +#define OTX_CPT_PF_BIST_STATUS (0x160ll) +#define OTX_CPT_PF_ECC0_CTL (0x200ll) +#define OTX_CPT_PF_ECC0_FLIP (0x210ll) +#define OTX_CPT_PF_ECC0_INT (0x220ll) +#define OTX_CPT_PF_ECC0_INT_W1S (0x230ll) +#define OTX_CPT_PF_ECC0_ENA_W1S (0x240ll) +#define OTX_CPT_PF_ECC0_ENA_W1C (0x250ll) +#define OTX_CPT_PF_MBOX_INTX(b) (0x400ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_INT_W1SX(b) (0x420ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_ENA_W1CX(b) (0x440ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_ENA_W1SX(b) (0x460ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXEC_INT (0x500ll) +#define OTX_CPT_PF_EXEC_INT_W1S (0x520ll) +#define OTX_CPT_PF_EXEC_ENA_W1C (0x540ll) +#define OTX_CPT_PF_EXEC_ENA_W1S (0x560ll) +#define OTX_CPT_PF_GX_EN(b) (0x600ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXEC_INFO (0x700ll) +#define OTX_CPT_PF_EXEC_BUSY (0x800ll) +#define OTX_CPT_PF_EXEC_INFO0 (0x900ll) +#define OTX_CPT_PF_EXEC_INFO1 (0x910ll) +#define OTX_CPT_PF_INST_REQ_PC (0x10000ll) +#define OTX_CPT_PF_INST_LATENCY_PC (0x10020ll) +#define OTX_CPT_PF_RD_REQ_PC (0x10040ll) +#define OTX_CPT_PF_RD_LATENCY_PC (0x10060ll) +#define OTX_CPT_PF_RD_UC_PC (0x10080ll) +#define OTX_CPT_PF_ACTIVE_CYCLES_PC (0x10100ll) +#define OTX_CPT_PF_EXE_CTL (0x4000000ll) +#define OTX_CPT_PF_EXE_STATUS (0x4000008ll) +#define OTX_CPT_PF_EXE_CLK (0x4000010ll) +#define OTX_CPT_PF_EXE_DBG_CTL (0x4000018ll) +#define OTX_CPT_PF_EXE_DBG_DATA (0x4000020ll) +#define OTX_CPT_PF_EXE_BIST_STATUS (0x4000028ll) +#define OTX_CPT_PF_EXE_REQ_TIMER (0x4000030ll) +#define OTX_CPT_PF_EXE_MEM_CTL (0x4000038ll) +#define OTX_CPT_PF_EXE_PERF_CTL (0x4001000ll) +#define OTX_CPT_PF_EXE_DBG_CNTX(b) (0x4001100ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXE_PERF_EVENT_CNT (0x4001180ll) +#define OTX_CPT_PF_EXE_EPCI_INBX_CNT(b) (0x4001200ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXE_EPCI_OUTBX_CNT(b) (0x4001240ll | (u64)(b) << 3) +#define OTX_CPT_PF_ENGX_UCODE_BASE(b) (0x4002000ll | (u64)(b) << 3) +#define OTX_CPT_PF_QX_CTL(b) (0x8000000ll | (u64)(b) << 20) +#define OTX_CPT_PF_QX_GMCTL(b) (0x8000020ll | (u64)(b) << 20) +#define OTX_CPT_PF_QX_CTL2(b) (0x8000100ll | (u64)(b) << 20) +#define OTX_CPT_PF_VFX_MBOXX(b, c) (0x8001000ll | (u64)(b) << 20 | \ + (u64)(c) << 8) + +/* + * Register (NCB) otx_cpt#_pf_bist_status + * + * CPT PF Control Bist Status Register + * This register has the BIST status of memories. Each bit is the BIST result + * of an individual memory (per bit, 0 = pass and 1 = fail). + * otx_cptx_pf_bist_status_s + * Word0 + * bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by + * CPT_RAMS_E. + */ +union otx_cptx_pf_bist_status { + u64 u; + struct otx_cptx_pf_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_30_63:34; + u64 bstatus:30; +#else /* Word 0 - Little Endian */ + u64 bstatus:30; + u64 reserved_30_63:34; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_constants + * + * CPT PF Constants Register + * This register contains implementation-related parameters of CPT in CNXXXX. + * otx_cptx_pf_constants_s + * Word 0 + * reserved_40_63:24 [63:40] Reserved. + * epcis:8 [39:32](RO) Number of EPCI busses. + * grps:8 [31:24](RO) Number of engine groups implemented. + * ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0, + * for CPT1 returns 0x18, or less if there are fuse-disables. + * se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30, + * or less if there are fuse-disables, for CPT1 returns 0x0. + * vq:8 [7:0](RO) Number of VQs. + */ +union otx_cptx_pf_constants { + u64 u; + struct otx_cptx_pf_constants_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_40_63:24; + u64 epcis:8; + u64 grps:8; + u64 ae:8; + u64 se:8; + u64 vq:8; +#else /* Word 0 - Little Endian */ + u64 vq:8; + u64 se:8; + u64 ae:8; + u64 grps:8; + u64 epcis:8; + u64 reserved_40_63:24; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_exe_bist_status + * + * CPT PF Engine Bist Status Register + * This register has the BIST status of each engine. Each bit is the + * BIST result of an individual engine (per bit, 0 = pass and 1 = fail). + * otx_cptx_pf_exe_bist_status_s + * Word0 + * reserved_48_63:16 [63:48] reserved + * bstatus:48 [47:0](RO/H) BIST status. One bit per engine. + * + */ +union otx_cptx_pf_exe_bist_status { + u64 u; + struct otx_cptx_pf_exe_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 bstatus:48; +#else /* Word 0 - Little Endian */ + u64 bstatus:48; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_q#_ctl + * + * CPT Queue Control Register + * This register configures queues. This register should be changed only + * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * otx_cptx_pf_qx_ctl_s + * Word0 + * reserved_60_63:4 [63:60] reserved. + * aura:12; [59:48](R/W) Guest-aura for returning this queue's + * instruction-chunk buffers to FPA. Only used when [INST_FREE] is set. + * For the FPA to not discard the request, FPA_PF_MAP() must map + * [AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * reserved_45_47:3 [47:45] reserved. + * size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per + * command buffer segment. Must be 8*n + 1, where n is the number of + * instructions per buffer segment. + * reserved_11_31:21 [31:11] Reserved. + * cont_err:1 [10:10](R/W) Continue on error. + * 0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or + * CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via + * CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared. Due to + * pipelining, additional instructions may have been processed between the + * instruction causing the error and the next instruction in the disabled + * queue (the instruction at CPT()_VQ()_SADDR). + * 1 = Ignore errors and continue processing instructions. + * For diagnostic use only. + * inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the + * end of an instruction chunk, that chunk will be freed to the FPA. + * inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions, + * instruction next chunk pointers, and result structures are stored in + * big-endian format in memory. + * iqb_ldwb:1 [7:7](R/W) Instruction load don't write back. + * 0 = The hardware issues NCB transient load (LDT) towards the cache, + * which if the line hits and is is dirty will cause the line to be + * written back before being replaced. + * 1 = The hardware issues NCB LDWB read-and-invalidate command towards + * the cache when fetching the last word of instructions; as a result the + * line will not be written back when replaced. This improves + * performance, but software must not read the instructions after they are + * posted to the hardware. Reads that do not consume the last word of a + * cache line always use LDI. + * reserved_4_6:3 [6:4] Reserved. + * grp:3; [3:1](R/W) Engine group. + * pri:1; [0:0](R/W) Queue priority. + * 1 = This queue has higher priority. Round-robin between higher + * priority queues. + * 0 = This queue has lower priority. Round-robin between lower + * priority queues. + */ +union otx_cptx_pf_qx_ctl { + u64 u; + struct otx_cptx_pf_qx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_60_63:4; + u64 aura:12; + u64 reserved_45_47:3; + u64 size:13; + u64 reserved_11_31:21; + u64 cont_err:1; + u64 inst_free:1; + u64 inst_be:1; + u64 iqb_ldwb:1; + u64 reserved_4_6:3; + u64 grp:3; + u64 pri:1; +#else /* Word 0 - Little Endian */ + u64 pri:1; + u64 grp:3; + u64 reserved_4_6:3; + u64 iqb_ldwb:1; + u64 inst_be:1; + u64 inst_free:1; + u64 cont_err:1; + u64 reserved_11_31:21; + u64 size:13; + u64 reserved_45_47:3; + u64 aura:12; + u64 reserved_60_63:4; +#endif /* Word 0 - End */ + } s; +}; +#endif /* __OTX_CPT_HW_TYPES_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf.h b/drivers/crypto/marvell/octeontx/otx_cptpf.h new file mode 100644 index 000000000000..73cd0a9bc563 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTPF_H +#define __OTX_CPTPF_H + +#include +#include +#include "otx_cptpf_ucode.h" + +/* + * OcteonTX CPT device structure + */ +struct otx_cpt_device { + void __iomem *reg_base; /* Register start address */ + struct pci_dev *pdev; /* Pci device handle */ + struct otx_cpt_eng_grps eng_grps;/* Engine groups information */ + struct list_head list; + u8 pf_type; /* PF type SE or AE */ + u8 max_vfs; /* Maximum number of VFs supported by the CPT */ + u8 vfs_enabled; /* Number of enabled VFs */ +}; + +void otx_cpt_mbox_intr_handler(struct otx_cpt_device *cpt, int mbx); +void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt); + +#endif /* __OTX_CPTPF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c new file mode 100644 index 000000000000..200fb3303db0 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cpt_common.h" +#include "otx_cptpf.h" + +#define DRV_NAME "octeontx-cpt" +#define DRV_VERSION "1.0" + +static void otx_cpt_disable_mbox_interrupts(struct otx_cpt_device *cpt) +{ + /* Disable mbox(0) interrupts for all VFs */ + writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1CX(0)); +} + +static void otx_cpt_enable_mbox_interrupts(struct otx_cpt_device *cpt) +{ + /* Enable mbox(0) interrupts for all VFs */ + writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1SX(0)); +} + +static irqreturn_t otx_cpt_mbx0_intr_handler(int __always_unused irq, + void *cpt) +{ + otx_cpt_mbox_intr_handler(cpt, 0); + + return IRQ_HANDLED; +} + +static void otx_cpt_reset(struct otx_cpt_device *cpt) +{ + writeq(1, cpt->reg_base + OTX_CPT_PF_RESET); +} + +static void otx_cpt_find_max_enabled_cores(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_constants pf_cnsts = {0}; + + pf_cnsts.u = readq(cpt->reg_base + OTX_CPT_PF_CONSTANTS); + cpt->eng_grps.avail.max_se_cnt = pf_cnsts.s.se; + cpt->eng_grps.avail.max_ae_cnt = pf_cnsts.s.ae; +} + +static u32 otx_cpt_check_bist_status(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_bist_status bist_sts = {0}; + + bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_BIST_STATUS); + return bist_sts.u; +} + +static u64 otx_cpt_check_exe_bist_status(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_exe_bist_status bist_sts = {0}; + + bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_EXE_BIST_STATUS); + return bist_sts.u; +} + +static int otx_cpt_device_init(struct otx_cpt_device *cpt) +{ + struct device *dev = &cpt->pdev->dev; + u16 sdevid; + u64 bist; + + /* Reset the PF when probed first */ + otx_cpt_reset(cpt); + mdelay(100); + + pci_read_config_word(cpt->pdev, PCI_SUBSYSTEM_ID, &sdevid); + + /* Check BIST status */ + bist = (u64)otx_cpt_check_bist_status(cpt); + if (bist) { + dev_err(dev, "RAM BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + bist = otx_cpt_check_exe_bist_status(cpt); + if (bist) { + dev_err(dev, "Engine BIST failed with code 0x%llx", bist); + return -ENODEV; + } + + /* Get max enabled cores */ + otx_cpt_find_max_enabled_cores(cpt); + + if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) && + (cpt->eng_grps.avail.max_se_cnt == 0)) { + cpt->pf_type = OTX_CPT_AE; + } else if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) && + (cpt->eng_grps.avail.max_ae_cnt == 0)) { + cpt->pf_type = OTX_CPT_SE; + } + + /* Get max VQs/VFs supported by the device */ + cpt->max_vfs = pci_sriov_get_totalvfs(cpt->pdev); + + /* Disable all cores */ + otx_cpt_disable_all_cores(cpt); + + return 0; +} + +static int otx_cpt_register_interrupts(struct otx_cpt_device *cpt) +{ + struct device *dev = &cpt->pdev->dev; + u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT; + u32 num_vec = OTX_CPT_PF_MSIX_VECTORS; + int ret; + + /* Enable MSI-X */ + ret = pci_alloc_irq_vectors(cpt->pdev, num_vec, num_vec, PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&cpt->pdev->dev, + "Request for #%d msix vectors failed\n", + num_vec); + return ret; + } + + /* Register mailbox interrupt handlers */ + ret = request_irq(pci_irq_vector(cpt->pdev, + OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)), + otx_cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt); + if (ret) { + dev_err(dev, "Request irq failed\n"); + pci_free_irq_vectors(cpt->pdev); + return ret; + } + /* Enable mailbox interrupt */ + otx_cpt_enable_mbox_interrupts(cpt); + return 0; +} + +static void otx_cpt_unregister_interrupts(struct otx_cpt_device *cpt) +{ + u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT; + + otx_cpt_disable_mbox_interrupts(cpt); + free_irq(pci_irq_vector(cpt->pdev, + OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)), + cpt); + pci_free_irq_vectors(cpt->pdev); +} + + +static int otx_cpt_sriov_configure(struct pci_dev *pdev, int numvfs) +{ + struct otx_cpt_device *cpt = pci_get_drvdata(pdev); + int ret = 0; + + if (numvfs > cpt->max_vfs) + numvfs = cpt->max_vfs; + + if (numvfs > 0) { + ret = otx_cpt_try_create_default_eng_grps(cpt->pdev, + &cpt->eng_grps, + cpt->pf_type); + if (ret) + return ret; + + cpt->vfs_enabled = numvfs; + ret = pci_enable_sriov(pdev, numvfs); + if (ret) { + cpt->vfs_enabled = 0; + return ret; + } + otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, true); + try_module_get(THIS_MODULE); + ret = numvfs; + } else { + pci_disable_sriov(pdev); + otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, false); + module_put(THIS_MODULE); + cpt->vfs_enabled = 0; + } + dev_notice(&cpt->pdev->dev, "VFs enabled: %d\n", ret); + + return ret; +} + +static int otx_cpt_probe(struct pci_dev *pdev, + const struct pci_device_id __always_unused *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cpt_device *cpt; + int err; + + cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL); + if (!cpt) + return -ENOMEM; + + pci_set_drvdata(pdev, cpt); + cpt->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto err_clear_drvdata; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto err_release_regions; + } + + /* MAP PF's configuration registers */ + cpt->reg_base = pci_iomap(pdev, OTX_CPT_PF_PCI_CFG_BAR, 0); + if (!cpt->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + + /* CPT device HW initialization */ + err = otx_cpt_device_init(cpt); + if (err) + goto err_unmap_region; + + /* Register interrupts */ + err = otx_cpt_register_interrupts(cpt); + if (err) + goto err_unmap_region; + + /* Initialize engine groups */ + err = otx_cpt_init_eng_grps(pdev, &cpt->eng_grps, cpt->pf_type); + if (err) + goto err_unregister_interrupts; + + return 0; + +err_unregister_interrupts: + otx_cpt_unregister_interrupts(cpt); +err_unmap_region: + pci_iounmap(pdev, cpt->reg_base); +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); +err_clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cpt_remove(struct pci_dev *pdev) +{ + struct otx_cpt_device *cpt = pci_get_drvdata(pdev); + + if (!cpt) + return; + + /* Disable VFs */ + pci_disable_sriov(pdev); + /* Cleanup engine groups */ + otx_cpt_cleanup_eng_grps(pdev, &cpt->eng_grps); + /* Disable CPT PF interrupts */ + otx_cpt_unregister_interrupts(cpt); + /* Disengage SE and AE cores from all groups */ + otx_cpt_disable_all_cores(cpt); + pci_iounmap(pdev, cpt->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id otx_cpt_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX_CPT_PCI_PF_DEVICE_ID) }, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cpt_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cpt_id_table, + .probe = otx_cpt_probe, + .remove = otx_cpt_remove, + .sriov_configure = otx_cpt_sriov_configure +}; + +module_pci_driver(otx_cpt_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Physical Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cpt_id_table); diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c new file mode 100644 index 000000000000..a6774232e9a3 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cpt_common.h" +#include "otx_cptpf.h" + +static char *get_mbox_opcode_str(int msg_opcode) +{ + char *str = "Unknown"; + + switch (msg_opcode) { + case OTX_CPT_MSG_VF_UP: + str = "UP"; + break; + + case OTX_CPT_MSG_VF_DOWN: + str = "DOWN"; + break; + + case OTX_CPT_MSG_READY: + str = "READY"; + break; + + case OTX_CPT_MSG_QLEN: + str = "QLEN"; + break; + + case OTX_CPT_MSG_QBIND_GRP: + str = "QBIND_GRP"; + break; + + case OTX_CPT_MSG_VQ_PRIORITY: + str = "VQ_PRIORITY"; + break; + + case OTX_CPT_MSG_PF_TYPE: + str = "PF_TYPE"; + break; + + case OTX_CPT_MSG_ACK: + str = "ACK"; + break; + + case OTX_CPT_MSG_NACK: + str = "NACK"; + break; + } + + return str; +} + +static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id) +{ + char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE]; + + hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8, + raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false); + if (vf_id >= 0) + pr_debug("MBOX opcode %s received from VF%d raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), vf_id, + raw_data_str); + else + pr_debug("MBOX opcode %s received from PF raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), raw_data_str); +} + +static void otx_cpt_send_msg_to_vf(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + /* Writing mbox(0) causes interrupt */ + writeq(mbx->data, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1)); + writeq(mbx->msg, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0)); +} + +/* + * ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void otx_cpt_mbox_send_ack(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = OTX_CPT_MSG_ACK; + otx_cpt_send_msg_to_vf(cpt, vf, mbx); +} + +/* NACKs VF's mailbox message that PF is not able to complete the action */ +static void otx_cptpf_mbox_send_nack(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = OTX_CPT_MSG_NACK; + otx_cpt_send_msg_to_vf(cpt, vf, mbx); +} + +static void otx_cpt_clear_mbox_intr(struct otx_cpt_device *cpt, u32 vf) +{ + /* W1C for the VF */ + writeq(1ull << vf, cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0)); +} + +/* + * Configure QLEN/Chunk sizes for VF + */ +static void otx_cpt_cfg_qlen_for_vf(struct otx_cpt_device *cpt, int vf, + u32 size) +{ + union otx_cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); + pf_qx_ctl.s.size = size; + pf_qx_ctl.s.cont_err = true; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); +} + +/* + * Configure VQ priority + */ +static void otx_cpt_cfg_vq_priority(struct otx_cpt_device *cpt, int vf, u32 pri) +{ + union otx_cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); + pf_qx_ctl.s.pri = pri; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); +} + +static int otx_cpt_bind_vq_to_grp(struct otx_cpt_device *cpt, u8 q, u8 grp) +{ + struct device *dev = &cpt->pdev->dev; + struct otx_cpt_eng_grp_info *eng_grp; + union otx_cptx_pf_qx_ctl pf_qx_ctl; + struct otx_cpt_ucode *ucode; + + if (q >= cpt->max_vfs) { + dev_err(dev, "Requested queue %d is > than maximum avail %d", + q, cpt->max_vfs); + return -EINVAL; + } + + if (grp >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Requested group %d is > than maximum avail %d", + grp, OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + eng_grp = &cpt->eng_grps.grp[grp]; + if (!eng_grp->is_enabled) { + dev_err(dev, "Requested engine group %d is disabled", grp); + return -EINVAL; + } + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(q)); + pf_qx_ctl.s.grp = grp; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(q)); + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + + if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_SE_TYPES)) + return OTX_CPT_SE_TYPES; + else if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_AE_TYPES)) + return OTX_CPT_AE_TYPES; + else + return BAD_OTX_CPTVF_TYPE; +} + +/* Interrupt handler to handle mailbox messages from VFs */ +static void otx_cpt_handle_mbox_intr(struct otx_cpt_device *cpt, int vf) +{ + int vftype = 0; + struct otx_cpt_mbox mbx = {}; + struct device *dev = &cpt->pdev->dev; + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0)); + mbx.data = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1)); + + dump_mbox_msg(&mbx, vf); + + switch (mbx.msg) { + case OTX_CPT_MSG_VF_UP: + mbx.msg = OTX_CPT_MSG_VF_UP; + mbx.data = cpt->vfs_enabled; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_READY: + mbx.msg = OTX_CPT_MSG_READY; + mbx.data = vf; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_VF_DOWN: + /* First msg in VF teardown sequence */ + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_QLEN: + otx_cpt_cfg_qlen_for_vf(cpt, vf, mbx.data); + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_QBIND_GRP: + vftype = otx_cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data); + if ((vftype != OTX_CPT_AE_TYPES) && + (vftype != OTX_CPT_SE_TYPES)) { + dev_err(dev, "VF%d binding to eng group %llu failed", + vf, mbx.data); + otx_cptpf_mbox_send_nack(cpt, vf, &mbx); + } else { + mbx.msg = OTX_CPT_MSG_QBIND_GRP; + mbx.data = vftype; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + } + break; + case OTX_CPT_MSG_PF_TYPE: + mbx.msg = OTX_CPT_MSG_PF_TYPE; + mbx.data = cpt->pf_type; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_VQ_PRIORITY: + otx_cpt_cfg_vq_priority(cpt, vf, mbx.data); + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + default: + dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n", + vf, mbx.msg); + break; + } +} + +void otx_cpt_mbox_intr_handler (struct otx_cpt_device *cpt, int mbx) +{ + u64 intr; + u8 vf; + + intr = readq(cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0)); + pr_debug("PF interrupt mbox%d mask 0x%llx\n", mbx, intr); + for (vf = 0; vf < cpt->max_vfs; vf++) { + if (intr & (1ULL << vf)) { + otx_cpt_handle_mbox_intr(cpt, vf); + otx_cpt_clear_mbox_intr(cpt, vf); + } + } +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c new file mode 100644 index 000000000000..d04baa319592 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c @@ -0,0 +1,1686 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include "otx_cpt_common.h" +#include "otx_cptpf_ucode.h" +#include "otx_cptpf.h" + +#define CSR_DELAY 30 +/* Tar archive defines */ +#define TAR_MAGIC "ustar" +#define TAR_MAGIC_LEN 6 +#define TAR_BLOCK_LEN 512 +#define REGTYPE '0' +#define AREGTYPE '\0' + +/* tar header as defined in POSIX 1003.1-1990. */ +struct tar_hdr_t { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char typeflag; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; +}; + +struct tar_blk_t { + union { + struct tar_hdr_t hdr; + char block[TAR_BLOCK_LEN]; + }; +}; + +struct tar_arch_info_t { + struct list_head ucodes; + const struct firmware *fw; +}; + +static struct otx_cpt_bitmap get_cores_bmap(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_bitmap bmap = { {0} }; + bool found = false; + int i; + + if (eng_grp->g->engs_num > OTX_CPT_MAX_ENGINES) { + dev_err(dev, "unsupported number of engines %d on octeontx", + eng_grp->g->engs_num); + return bmap; + } + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (eng_grp->engs[i].type) { + bitmap_or(bmap.bits, bmap.bits, + eng_grp->engs[i].bmap, + eng_grp->g->engs_num); + bmap.size = eng_grp->g->engs_num; + found = true; + } + } + + if (!found) + dev_err(dev, "No engines reserved for engine group %d", + eng_grp->idx); + return bmap; +} + +static int is_eng_type(int val, int eng_type) +{ + return val & (1 << eng_type); +} + +static int dev_supports_eng_type(struct otx_cpt_eng_grps *eng_grps, + int eng_type) +{ + return is_eng_type(eng_grps->eng_types_supported, eng_type); +} + +static void set_ucode_filename(struct otx_cpt_ucode *ucode, + const char *filename) +{ + strlcpy(ucode->filename, filename, OTX_CPT_UCODE_NAME_LENGTH); +} + +static char *get_eng_type_str(int eng_type) +{ + char *str = "unknown"; + + switch (eng_type) { + case OTX_CPT_SE_TYPES: + str = "SE"; + break; + + case OTX_CPT_AE_TYPES: + str = "AE"; + break; + } + return str; +} + +static char *get_ucode_type_str(int ucode_type) +{ + char *str = "unknown"; + + switch (ucode_type) { + case (1 << OTX_CPT_SE_TYPES): + str = "SE"; + break; + + case (1 << OTX_CPT_AE_TYPES): + str = "AE"; + break; + } + return str; +} + +static int get_ucode_type(struct otx_cpt_ucode_hdr *ucode_hdr, int *ucode_type) +{ + char tmp_ver_str[OTX_CPT_UCODE_VER_STR_SZ]; + u32 i, val = 0; + u8 nn; + + strlcpy(tmp_ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); + for (i = 0; i < strlen(tmp_ver_str); i++) + tmp_ver_str[i] = tolower(tmp_ver_str[i]); + + nn = ucode_hdr->ver_num.nn; + if (strnstr(tmp_ver_str, "se-", OTX_CPT_UCODE_VER_STR_SZ) && + (nn == OTX_CPT_SE_UC_TYPE1 || nn == OTX_CPT_SE_UC_TYPE2 || + nn == OTX_CPT_SE_UC_TYPE3)) + val |= 1 << OTX_CPT_SE_TYPES; + if (strnstr(tmp_ver_str, "ae", OTX_CPT_UCODE_VER_STR_SZ) && + nn == OTX_CPT_AE_UC_TYPE) + val |= 1 << OTX_CPT_AE_TYPES; + + *ucode_type = val; + + if (!val) + return -EINVAL; + if (is_eng_type(val, OTX_CPT_AE_TYPES) && + is_eng_type(val, OTX_CPT_SE_TYPES)) + return -EINVAL; + return 0; +} + +static int is_mem_zero(const char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) { + if (ptr[i]) + return 0; + } + return 1; +} + +static int cpt_set_ucode_base(struct otx_cpt_eng_grp_info *eng_grp, void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + dma_addr_t dma_addr; + struct otx_cpt_bitmap bmap; + int i; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + if (eng_grp->mirror.is_ena) + dma_addr = + eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].align_dma; + else + dma_addr = eng_grp->ucode[0].align_dma; + + /* + * Set UCODE_BASE only for the cores which are not used, + * other cores should have already valid UCODE_BASE set + */ + for_each_set_bit(i, bmap.bits, bmap.size) + if (!eng_grp->g->eng_ref_cnt[i]) + writeq((u64) dma_addr, cpt->reg_base + + OTX_CPT_PF_ENGX_UCODE_BASE(i)); + return 0; +} + +static int cpt_detach_and_disable_cores(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + struct otx_cpt_bitmap bmap = { {0} }; + int timeout = 10; + int i, busy; + u64 reg; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Detach the cores from group */ + reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + for_each_set_bit(i, bmap.bits, bmap.size) { + if (reg & (1ull << i)) { + eng_grp->g->eng_ref_cnt[i]--; + reg &= ~(1ull << i); + } + } + writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + + /* Wait for cores to become idle */ + do { + busy = 0; + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + for_each_set_bit(i, bmap.bits, bmap.size) + if (reg & (1ull << i)) { + busy = 1; + break; + } + } while (busy); + + /* Disable the cores only if they are not used anymore */ + reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL); + for_each_set_bit(i, bmap.bits, bmap.size) + if (!eng_grp->g->eng_ref_cnt[i]) + reg &= ~(1ull << i); + writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL); + + return 0; +} + +static int cpt_attach_and_enable_cores(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + struct otx_cpt_bitmap bmap; + u64 reg; + int i; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Attach the cores to the group */ + reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + for_each_set_bit(i, bmap.bits, bmap.size) { + if (!(reg & (1ull << i))) { + eng_grp->g->eng_ref_cnt[i]++; + reg |= 1ull << i; + } + } + writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + + /* Enable the cores */ + reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL); + for_each_set_bit(i, bmap.bits, bmap.size) + reg |= 1ull << i; + writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL); + + return 0; +} + +static int process_tar_file(struct device *dev, + struct tar_arch_info_t *tar_arch, char *filename, + const u8 *data, u32 size) +{ + struct tar_ucode_info_t *tar_info; + struct otx_cpt_ucode_hdr *ucode_hdr; + int ucode_type, ucode_size; + + /* + * If size is less than microcode header size then don't report + * an error because it might not be microcode file, just process + * next file from archive + */ + if (size < sizeof(struct otx_cpt_ucode_hdr)) + return 0; + + ucode_hdr = (struct otx_cpt_ucode_hdr *) data; + /* + * If microcode version can't be found don't report an error + * because it might not be microcode file, just process next file + */ + if (get_ucode_type(ucode_hdr, &ucode_type)) + return 0; + + ucode_size = ntohl(ucode_hdr->code_length) * 2; + if (!ucode_size || (size < round_up(ucode_size, 16) + + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { + dev_err(dev, "Ucode %s invalid size", filename); + return -EINVAL; + } + + tar_info = kzalloc(sizeof(struct tar_ucode_info_t), GFP_KERNEL); + if (!tar_info) + return -ENOMEM; + + tar_info->ucode_ptr = data; + set_ucode_filename(&tar_info->ucode, filename); + memcpy(tar_info->ucode.ver_str, ucode_hdr->ver_str, + OTX_CPT_UCODE_VER_STR_SZ); + tar_info->ucode.ver_num = ucode_hdr->ver_num; + tar_info->ucode.type = ucode_type; + tar_info->ucode.size = ucode_size; + list_add_tail(&tar_info->list, &tar_arch->ucodes); + + return 0; +} + +static void release_tar_archive(struct tar_arch_info_t *tar_arch) +{ + struct tar_ucode_info_t *curr, *temp; + + if (!tar_arch) + return; + + list_for_each_entry_safe(curr, temp, &tar_arch->ucodes, list) { + list_del(&curr->list); + kfree(curr); + } + + if (tar_arch->fw) + release_firmware(tar_arch->fw); + kfree(tar_arch); +} + +static struct tar_ucode_info_t *get_uc_from_tar_archive( + struct tar_arch_info_t *tar_arch, + int ucode_type) +{ + struct tar_ucode_info_t *curr, *uc_found = NULL; + + list_for_each_entry(curr, &tar_arch->ucodes, list) { + if (!is_eng_type(curr->ucode.type, ucode_type)) + continue; + + if (!uc_found) { + uc_found = curr; + continue; + } + + switch (ucode_type) { + case OTX_CPT_AE_TYPES: + break; + + case OTX_CPT_SE_TYPES: + if (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE2 || + (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE3 + && curr->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE1)) + uc_found = curr; + break; + } + } + + return uc_found; +} + +static void print_tar_dbg_info(struct tar_arch_info_t *tar_arch, + char *tar_filename) +{ + struct tar_ucode_info_t *curr; + + pr_debug("Tar archive filename %s", tar_filename); + pr_debug("Tar archive pointer %p, size %ld", tar_arch->fw->data, + tar_arch->fw->size); + list_for_each_entry(curr, &tar_arch->ucodes, list) { + pr_debug("Ucode filename %s", curr->ucode.filename); + pr_debug("Ucode version string %s", curr->ucode.ver_str); + pr_debug("Ucode version %d.%d.%d.%d", + curr->ucode.ver_num.nn, curr->ucode.ver_num.xx, + curr->ucode.ver_num.yy, curr->ucode.ver_num.zz); + pr_debug("Ucode type (%d) %s", curr->ucode.type, + get_ucode_type_str(curr->ucode.type)); + pr_debug("Ucode size %d", curr->ucode.size); + pr_debug("Ucode ptr %p\n", curr->ucode_ptr); + } +} + +static struct tar_arch_info_t *load_tar_archive(struct device *dev, + char *tar_filename) +{ + struct tar_arch_info_t *tar_arch = NULL; + struct tar_blk_t *tar_blk; + unsigned int cur_size; + size_t tar_offs = 0; + size_t tar_size; + int ret; + + tar_arch = kzalloc(sizeof(struct tar_arch_info_t), GFP_KERNEL); + if (!tar_arch) + return NULL; + + INIT_LIST_HEAD(&tar_arch->ucodes); + + /* Load tar archive */ + ret = request_firmware(&tar_arch->fw, tar_filename, dev); + if (ret) + goto release_tar_arch; + + if (tar_arch->fw->size < TAR_BLOCK_LEN) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + tar_size = tar_arch->fw->size; + tar_blk = (struct tar_blk_t *) tar_arch->fw->data; + if (strncmp(tar_blk->hdr.magic, TAR_MAGIC, TAR_MAGIC_LEN - 1)) { + dev_err(dev, "Unsupported format of tar archive %s", + tar_filename); + goto release_tar_arch; + } + + while (1) { + /* Read current file size */ + ret = kstrtouint(tar_blk->hdr.size, 8, &cur_size); + if (ret) + goto release_tar_arch; + + if (tar_offs + cur_size > tar_size || + tar_offs + 2*TAR_BLOCK_LEN > tar_size) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + tar_offs += TAR_BLOCK_LEN; + if (tar_blk->hdr.typeflag == REGTYPE || + tar_blk->hdr.typeflag == AREGTYPE) { + ret = process_tar_file(dev, tar_arch, + tar_blk->hdr.name, + &tar_arch->fw->data[tar_offs], + cur_size); + if (ret) + goto release_tar_arch; + } + + tar_offs += (cur_size/TAR_BLOCK_LEN) * TAR_BLOCK_LEN; + if (cur_size % TAR_BLOCK_LEN) + tar_offs += TAR_BLOCK_LEN; + + /* Check for the end of the archive */ + if (tar_offs + 2*TAR_BLOCK_LEN > tar_size) { + dev_err(dev, "Invalid tar archive %s ", tar_filename); + goto release_tar_arch; + } + + if (is_mem_zero(&tar_arch->fw->data[tar_offs], + 2*TAR_BLOCK_LEN)) + break; + + /* Read next block from tar archive */ + tar_blk = (struct tar_blk_t *) &tar_arch->fw->data[tar_offs]; + } + + print_tar_dbg_info(tar_arch, tar_filename); + return tar_arch; +release_tar_arch: + release_tar_archive(tar_arch); + return NULL; +} + +static struct otx_cpt_engs_rsvd *find_engines_by_type( + struct otx_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + int i; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + if (eng_grp->engs[i].type == eng_type) + return &eng_grp->engs[i]; + } + return NULL; +} + +int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type) +{ + return is_eng_type(ucode->type, eng_type); +} +EXPORT_SYMBOL_GPL(otx_cpt_uc_supports_eng_type); + +int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + struct otx_cpt_engs_rsvd *engs; + + engs = find_engines_by_type(eng_grp, eng_type); + + return (engs != NULL ? 1 : 0); +} +EXPORT_SYMBOL_GPL(otx_cpt_eng_grp_has_eng_type); + +static void print_ucode_info(struct otx_cpt_eng_grp_info *eng_grp, + char *buf, int size) +{ + if (eng_grp->mirror.is_ena) { + scnprintf(buf, size, "%s (shared with engine_group%d)", + eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].ver_str, + eng_grp->mirror.idx); + } else { + scnprintf(buf, size, "%s", eng_grp->ucode[0].ver_str); + } +} + +static void print_engs_info(struct otx_cpt_eng_grp_info *eng_grp, + char *buf, int size, int idx) +{ + struct otx_cpt_engs_rsvd *mirrored_engs = NULL; + struct otx_cpt_engs_rsvd *engs; + int len, i; + + buf[0] = '\0'; + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (idx != -1 && idx != i) + continue; + + if (eng_grp->mirror.is_ena) + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + if (i > 0 && idx == -1) { + len = strlen(buf); + scnprintf(buf+len, size-len, ", "); + } + + len = strlen(buf); + scnprintf(buf+len, size-len, "%d %s ", mirrored_engs ? + engs->count + mirrored_engs->count : engs->count, + get_eng_type_str(engs->type)); + if (mirrored_engs) { + len = strlen(buf); + scnprintf(buf+len, size-len, + "(%d shared with engine_group%d) ", + engs->count <= 0 ? engs->count + + mirrored_engs->count : mirrored_engs->count, + eng_grp->mirror.idx); + } + } +} + +static void print_ucode_dbg_info(struct otx_cpt_ucode *ucode) +{ + pr_debug("Ucode info"); + pr_debug("Ucode version string %s", ucode->ver_str); + pr_debug("Ucode version %d.%d.%d.%d", ucode->ver_num.nn, + ucode->ver_num.xx, ucode->ver_num.yy, ucode->ver_num.zz); + pr_debug("Ucode type %s", get_ucode_type_str(ucode->type)); + pr_debug("Ucode size %d", ucode->size); + pr_debug("Ucode virt address %16.16llx", (u64)ucode->align_va); + pr_debug("Ucode phys address %16.16llx\n", ucode->align_dma); +} + +static void cpt_print_engines_mask(struct otx_cpt_eng_grp_info *eng_grp, + struct device *dev, char *buf, int size) +{ + struct otx_cpt_bitmap bmap; + u32 mask[2]; + + bmap = get_cores_bmap(dev, eng_grp); + if (!bmap.size) { + scnprintf(buf, size, "unknown"); + return; + } + bitmap_to_arr32(mask, bmap.bits, bmap.size); + scnprintf(buf, size, "%8.8x %8.8x", mask[1], mask[0]); +} + + +static void print_dbg_info(struct device *dev, + struct otx_cpt_eng_grps *eng_grps) +{ + char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *mirrored_grp; + char engs_mask[OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *grp; + struct otx_cpt_engs_rsvd *engs; + u32 mask[4]; + int i, j; + + pr_debug("Engine groups global info"); + pr_debug("max SE %d, max AE %d", + eng_grps->avail.max_se_cnt, eng_grps->avail.max_ae_cnt); + pr_debug("free SE %d", eng_grps->avail.se_cnt); + pr_debug("free AE %d", eng_grps->avail.ae_cnt); + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + pr_debug("engine_group%d, state %s", i, grp->is_enabled ? + "enabled" : "disabled"); + if (grp->is_enabled) { + mirrored_grp = &eng_grps->grp[grp->mirror.idx]; + pr_debug("Ucode0 filename %s, version %s", + grp->mirror.is_ena ? + mirrored_grp->ucode[0].filename : + grp->ucode[0].filename, + grp->mirror.is_ena ? + mirrored_grp->ucode[0].ver_str : + grp->ucode[0].ver_str); + } + + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + engs = &grp->engs[j]; + if (engs->type) { + print_engs_info(grp, engs_info, + 2*OTX_CPT_UCODE_NAME_LENGTH, j); + pr_debug("Slot%d: %s", j, engs_info); + bitmap_to_arr32(mask, engs->bmap, + eng_grps->engs_num); + pr_debug("Mask: %8.8x %8.8x %8.8x %8.8x", + mask[3], mask[2], mask[1], mask[0]); + } else + pr_debug("Slot%d not used", j); + } + if (grp->is_enabled) { + cpt_print_engines_mask(grp, dev, engs_mask, + OTX_CPT_UCODE_NAME_LENGTH); + pr_debug("Cmask: %s", engs_mask); + } + } +} + +static int update_engines_avail_count(struct device *dev, + struct otx_cpt_engs_available *avail, + struct otx_cpt_engs_rsvd *engs, int val) +{ + switch (engs->type) { + case OTX_CPT_SE_TYPES: + avail->se_cnt += val; + break; + + case OTX_CPT_AE_TYPES: + avail->ae_cnt += val; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + return 0; +} + +static int update_engines_offset(struct device *dev, + struct otx_cpt_engs_available *avail, + struct otx_cpt_engs_rsvd *engs) +{ + switch (engs->type) { + case OTX_CPT_SE_TYPES: + engs->offset = 0; + break; + + case OTX_CPT_AE_TYPES: + engs->offset = avail->max_se_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + return 0; +} + +static int release_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp) +{ + int i, ret = 0; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) + continue; + + if (grp->engs[i].count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, + &grp->engs[i], + grp->engs[i].count); + if (ret) + return ret; + } + + grp->engs[i].type = 0; + grp->engs[i].count = 0; + grp->engs[i].offset = 0; + grp->engs[i].ucode = NULL; + bitmap_zero(grp->engs[i].bmap, grp->g->engs_num); + } + + return 0; +} + +static int do_reserve_engines(struct device *dev, + struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_engs) +{ + struct otx_cpt_engs_rsvd *engs = NULL; + int i, ret; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) { + engs = &grp->engs[i]; + break; + } + } + + if (!engs) + return -ENOMEM; + + engs->type = req_engs->type; + engs->count = req_engs->count; + + ret = update_engines_offset(dev, &grp->g->avail, engs); + if (ret) + return ret; + + if (engs->count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, engs, + -engs->count); + if (ret) + return ret; + } + + return 0; +} + +static int check_engines_availability(struct device *dev, + struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_eng) +{ + int avail_cnt = 0; + + switch (req_eng->type) { + case OTX_CPT_SE_TYPES: + avail_cnt = grp->g->avail.se_cnt; + break; + + case OTX_CPT_AE_TYPES: + avail_cnt = grp->g->avail.ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", req_eng->type); + return -EINVAL; + } + + if (avail_cnt < req_eng->count) { + dev_err(dev, + "Error available %s engines %d < than requested %d", + get_eng_type_str(req_eng->type), + avail_cnt, req_eng->count); + return -EBUSY; + } + + return 0; +} + +static int reserve_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_engs, int req_cnt) +{ + int i, ret; + + /* Validate if a number of requested engines is available */ + for (i = 0; i < req_cnt; i++) { + ret = check_engines_availability(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + + /* Reserve requested engines for this engine group */ + for (i = 0; i < req_cnt; i++) { + ret = do_reserve_engines(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + return 0; +} + +static ssize_t eng_grp_info_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + char ucode_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + char engs_mask[OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *eng_grp; + int ret; + + eng_grp = container_of(attr, struct otx_cpt_eng_grp_info, info_attr); + mutex_lock(&eng_grp->g->lock); + + print_engs_info(eng_grp, engs_info, 2*OTX_CPT_UCODE_NAME_LENGTH, -1); + print_ucode_info(eng_grp, ucode_info, 2*OTX_CPT_UCODE_NAME_LENGTH); + cpt_print_engines_mask(eng_grp, dev, engs_mask, + OTX_CPT_UCODE_NAME_LENGTH); + ret = scnprintf(buf, PAGE_SIZE, + "Microcode : %s\nEngines: %s\nEngines mask: %s\n", + ucode_info, engs_info, engs_mask); + + mutex_unlock(&eng_grp->g->lock); + return ret; +} + +static int create_sysfs_eng_grps_info(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + int ret; + + eng_grp->info_attr.show = eng_grp_info_show; + eng_grp->info_attr.store = NULL; + eng_grp->info_attr.attr.name = eng_grp->sysfs_info_name; + eng_grp->info_attr.attr.mode = 0440; + sysfs_attr_init(&eng_grp->info_attr.attr); + ret = device_create_file(dev, &eng_grp->info_attr); + if (ret) + return ret; + + return 0; +} + +static void ucode_unload(struct device *dev, struct otx_cpt_ucode *ucode) +{ + if (ucode->va) { + dma_free_coherent(dev, ucode->size + OTX_CPT_UCODE_ALIGNMENT, + ucode->va, ucode->dma); + ucode->va = NULL; + ucode->align_va = NULL; + ucode->dma = 0; + ucode->align_dma = 0; + ucode->size = 0; + } + + memset(&ucode->ver_str, 0, OTX_CPT_UCODE_VER_STR_SZ); + memset(&ucode->ver_num, 0, sizeof(struct otx_cpt_ucode_ver_num)); + set_ucode_filename(ucode, ""); + ucode->type = 0; +} + +static int copy_ucode_to_dma_mem(struct device *dev, + struct otx_cpt_ucode *ucode, + const u8 *ucode_data) +{ + u32 i; + + /* Allocate DMAable space */ + ucode->va = dma_alloc_coherent(dev, ucode->size + + OTX_CPT_UCODE_ALIGNMENT, + &ucode->dma, GFP_KERNEL); + if (!ucode->va) { + dev_err(dev, "Unable to allocate space for microcode"); + return -ENOMEM; + } + ucode->align_va = PTR_ALIGN(ucode->va, OTX_CPT_UCODE_ALIGNMENT); + ucode->align_dma = PTR_ALIGN(ucode->dma, OTX_CPT_UCODE_ALIGNMENT); + + memcpy((void *) ucode->align_va, (void *) ucode_data + + sizeof(struct otx_cpt_ucode_hdr), ucode->size); + + /* Byte swap 64-bit */ + for (i = 0; i < (ucode->size / 8); i++) + ((u64 *)ucode->align_va)[i] = + cpu_to_be64(((u64 *)ucode->align_va)[i]); + /* Ucode needs 16-bit swap */ + for (i = 0; i < (ucode->size / 2); i++) + ((u16 *)ucode->align_va)[i] = + cpu_to_be16(((u16 *)ucode->align_va)[i]); + return 0; +} + +static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode, + const char *ucode_filename) +{ + struct otx_cpt_ucode_hdr *ucode_hdr; + const struct firmware *fw; + int ret; + + set_ucode_filename(ucode, ucode_filename); + ret = request_firmware(&fw, ucode->filename, dev); + if (ret) + return ret; + + ucode_hdr = (struct otx_cpt_ucode_hdr *) fw->data; + memcpy(ucode->ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); + ucode->ver_num = ucode_hdr->ver_num; + ucode->size = ntohl(ucode_hdr->code_length) * 2; + if (!ucode->size || (fw->size < round_up(ucode->size, 16) + + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { + dev_err(dev, "Ucode %s invalid size", ucode_filename); + ret = -EINVAL; + goto release_fw; + } + + ret = get_ucode_type(ucode_hdr, &ucode->type); + if (ret) { + dev_err(dev, "Microcode %s unknown type 0x%x", ucode->filename, + ucode->type); + goto release_fw; + } + + ret = copy_ucode_to_dma_mem(dev, ucode, fw->data); + if (ret) + goto release_fw; + + print_ucode_dbg_info(ucode); +release_fw: + release_firmware(fw); + return ret; +} + +static int enable_eng_grp(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int ret; + + ret = cpt_set_ucode_base(eng_grp, obj); + if (ret) + return ret; + + ret = cpt_attach_and_enable_cores(eng_grp, obj); + return ret; +} + +static int disable_eng_grp(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int i, ret; + + ret = cpt_detach_and_disable_cores(eng_grp, obj); + if (ret) + return ret; + + /* Unload ucode used by this engine group */ + ucode_unload(dev, &eng_grp->ucode[0]); + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + eng_grp->engs[i].ucode = &eng_grp->ucode[0]; + } + + ret = cpt_set_ucode_base(eng_grp, obj); + + return ret; +} + +static void setup_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp, + struct otx_cpt_eng_grp_info *src_grp) +{ + /* Setup fields for engine group which is mirrored */ + src_grp->mirror.is_ena = false; + src_grp->mirror.idx = 0; + src_grp->mirror.ref_count++; + + /* Setup fields for mirroring engine group */ + dst_grp->mirror.is_ena = true; + dst_grp->mirror.idx = src_grp->idx; + dst_grp->mirror.ref_count = 0; +} + +static void remove_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp) +{ + struct otx_cpt_eng_grp_info *src_grp; + + if (!dst_grp->mirror.is_ena) + return; + + src_grp = &dst_grp->g->grp[dst_grp->mirror.idx]; + + src_grp->mirror.ref_count--; + dst_grp->mirror.is_ena = false; + dst_grp->mirror.idx = 0; + dst_grp->mirror.ref_count = 0; +} + +static void update_requested_engs(struct otx_cpt_eng_grp_info *mirrored_eng_grp, + struct otx_cpt_engines *engs, int engs_cnt) +{ + struct otx_cpt_engs_rsvd *mirrored_engs; + int i; + + for (i = 0; i < engs_cnt; i++) { + mirrored_engs = find_engines_by_type(mirrored_eng_grp, + engs[i].type); + if (!mirrored_engs) + continue; + + /* + * If mirrored group has this type of engines attached then + * there are 3 scenarios possible: + * 1) mirrored_engs.count == engs[i].count then all engines + * from mirrored engine group will be shared with this engine + * group + * 2) mirrored_engs.count > engs[i].count then only a subset of + * engines from mirrored engine group will be shared with this + * engine group + * 3) mirrored_engs.count < engs[i].count then all engines + * from mirrored engine group will be shared with this group + * and additional engines will be reserved for exclusively use + * by this engine group + */ + engs[i].count -= mirrored_engs->count; + } +} + +static struct otx_cpt_eng_grp_info *find_mirrored_eng_grp( + struct otx_cpt_eng_grp_info *grp) +{ + struct otx_cpt_eng_grps *eng_grps = grp->g; + int i; + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + continue; + if (eng_grps->grp[i].ucode[0].type) + continue; + if (grp->idx == i) + continue; + if (!strncasecmp(eng_grps->grp[i].ucode[0].ver_str, + grp->ucode[0].ver_str, + OTX_CPT_UCODE_VER_STR_SZ)) + return &eng_grps->grp[i]; + } + + return NULL; +} + +static struct otx_cpt_eng_grp_info *find_unused_eng_grp( + struct otx_cpt_eng_grps *eng_grps) +{ + int i; + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + return &eng_grps->grp[i]; + } + return NULL; +} + +static int eng_grp_update_masks(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_engs_rsvd *engs, *mirrored_engs; + struct otx_cpt_bitmap tmp_bmap = { {0} }; + int i, j, cnt, max_cnt; + int bit; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (engs->count <= 0) + continue; + + switch (engs->type) { + case OTX_CPT_SE_TYPES: + max_cnt = eng_grp->g->avail.max_se_cnt; + break; + + case OTX_CPT_AE_TYPES: + max_cnt = eng_grp->g->avail.max_ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d", engs->type); + return -EINVAL; + } + + cnt = engs->count; + WARN_ON(engs->offset + max_cnt > OTX_CPT_MAX_ENGINES); + bitmap_zero(tmp_bmap.bits, eng_grp->g->engs_num); + for (j = engs->offset; j < engs->offset + max_cnt; j++) { + if (!eng_grp->g->eng_ref_cnt[j]) { + bitmap_set(tmp_bmap.bits, j, 1); + cnt--; + if (!cnt) + break; + } + } + + if (cnt) + return -ENOSPC; + + bitmap_copy(engs->bmap, tmp_bmap.bits, eng_grp->g->engs_num); + } + + if (!eng_grp->mirror.is_ena) + return 0; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + WARN_ON(!mirrored_engs && engs->count <= 0); + if (!mirrored_engs) + continue; + + bitmap_copy(tmp_bmap.bits, mirrored_engs->bmap, + eng_grp->g->engs_num); + if (engs->count < 0) { + bit = find_first_bit(mirrored_engs->bmap, + eng_grp->g->engs_num); + bitmap_clear(tmp_bmap.bits, bit, -engs->count); + } + bitmap_or(engs->bmap, engs->bmap, tmp_bmap.bits, + eng_grp->g->engs_num); + } + return 0; +} + +static int delete_engine_group(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + int i, ret; + + if (!eng_grp->is_enabled) + return -EINVAL; + + if (eng_grp->mirror.ref_count) { + dev_err(dev, "Can't delete engine_group%d as it is used by:", + eng_grp->idx); + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (eng_grp->g->grp[i].mirror.is_ena && + eng_grp->g->grp[i].mirror.idx == eng_grp->idx) + dev_err(dev, "engine_group%d", i); + } + return -EINVAL; + } + + /* Removing engine group mirroring if enabled */ + remove_eng_grp_mirroring(eng_grp); + + /* Disable engine group */ + ret = disable_eng_grp(dev, eng_grp, eng_grp->g->obj); + if (ret) + return ret; + + /* Release all engines held by this engine group */ + ret = release_engines(dev, eng_grp); + if (ret) + return ret; + + device_remove_file(dev, &eng_grp->info_attr); + eng_grp->is_enabled = false; + + return 0; +} + +static int validate_1_ucode_scenario(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp, + struct otx_cpt_engines *engs, int engs_cnt) +{ + int i; + + /* Verify that ucode loaded supports requested engine types */ + for (i = 0; i < engs_cnt; i++) { + if (!otx_cpt_uc_supports_eng_type(&eng_grp->ucode[0], + engs[i].type)) { + dev_err(dev, + "Microcode %s does not support %s engines", + eng_grp->ucode[0].filename, + get_eng_type_str(engs[i].type)); + return -EINVAL; + } + } + return 0; +} + +static void update_ucode_ptrs(struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_ucode *ucode; + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + WARN_ON(!eng_grp->engs[0].type); + eng_grp->engs[0].ucode = ucode; +} + +static int create_engine_group(struct device *dev, + struct otx_cpt_eng_grps *eng_grps, + struct otx_cpt_engines *engs, int engs_cnt, + void *ucode_data[], int ucodes_cnt, + bool use_uc_from_tar_arch) +{ + struct otx_cpt_eng_grp_info *mirrored_eng_grp; + struct tar_ucode_info_t *tar_info; + struct otx_cpt_eng_grp_info *eng_grp; + int i, ret = 0; + + if (ucodes_cnt > OTX_CPT_MAX_ETYPES_PER_GRP) + return -EINVAL; + + /* Validate if requested engine types are supported by this device */ + for (i = 0; i < engs_cnt; i++) + if (!dev_supports_eng_type(eng_grps, engs[i].type)) { + dev_err(dev, "Device does not support %s engines", + get_eng_type_str(engs[i].type)); + return -EPERM; + } + + /* Find engine group which is not used */ + eng_grp = find_unused_eng_grp(eng_grps); + if (!eng_grp) { + dev_err(dev, "Error all engine groups are being used"); + return -ENOSPC; + } + + /* Load ucode */ + for (i = 0; i < ucodes_cnt; i++) { + if (use_uc_from_tar_arch) { + tar_info = (struct tar_ucode_info_t *) ucode_data[i]; + eng_grp->ucode[i] = tar_info->ucode; + ret = copy_ucode_to_dma_mem(dev, &eng_grp->ucode[i], + tar_info->ucode_ptr); + } else + ret = ucode_load(dev, &eng_grp->ucode[i], + (char *) ucode_data[i]); + if (ret) + goto err_ucode_unload; + } + + /* Validate scenario where 1 ucode is used */ + ret = validate_1_ucode_scenario(dev, eng_grp, engs, engs_cnt); + if (ret) + goto err_ucode_unload; + + /* Check if this group mirrors another existing engine group */ + mirrored_eng_grp = find_mirrored_eng_grp(eng_grp); + if (mirrored_eng_grp) { + /* Setup mirroring */ + setup_eng_grp_mirroring(eng_grp, mirrored_eng_grp); + + /* + * Update count of requested engines because some + * of them might be shared with mirrored group + */ + update_requested_engs(mirrored_eng_grp, engs, engs_cnt); + } + + /* Reserve engines */ + ret = reserve_engines(dev, eng_grp, engs, engs_cnt); + if (ret) + goto err_ucode_unload; + + /* Update ucode pointers used by engines */ + update_ucode_ptrs(eng_grp); + + /* Update engine masks used by this group */ + ret = eng_grp_update_masks(dev, eng_grp); + if (ret) + goto err_release_engs; + + /* Create sysfs entry for engine group info */ + ret = create_sysfs_eng_grps_info(dev, eng_grp); + if (ret) + goto err_release_engs; + + /* Enable engine group */ + ret = enable_eng_grp(eng_grp, eng_grps->obj); + if (ret) + goto err_release_engs; + + /* + * If this engine group mirrors another engine group + * then we need to unload ucode as we will use ucode + * from mirrored engine group + */ + if (eng_grp->mirror.is_ena) + ucode_unload(dev, &eng_grp->ucode[0]); + + eng_grp->is_enabled = true; + if (eng_grp->mirror.is_ena) + dev_info(dev, + "Engine_group%d: reuse microcode %s from group %d", + eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str, + mirrored_eng_grp->idx); + else + dev_info(dev, "Engine_group%d: microcode loaded %s", + eng_grp->idx, eng_grp->ucode[0].ver_str); + + return 0; + +err_release_engs: + release_engines(dev, eng_grp); +err_ucode_unload: + ucode_unload(dev, &eng_grp->ucode[0]); + return ret; +} + +static ssize_t ucode_load_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + char *ucode_filename[OTX_CPT_MAX_ETYPES_PER_GRP]; + char tmp_buf[OTX_CPT_UCODE_NAME_LENGTH] = { 0 }; + char *start, *val, *err_msg, *tmp; + struct otx_cpt_eng_grps *eng_grps; + int grp_idx = 0, ret = -EINVAL; + bool has_se, has_ie, has_ae; + int del_grp_idx = -1; + int ucode_idx = 0; + + if (strlen(buf) > OTX_CPT_UCODE_NAME_LENGTH) + return -EINVAL; + + eng_grps = container_of(attr, struct otx_cpt_eng_grps, ucode_load_attr); + err_msg = "Invalid engine group format"; + strlcpy(tmp_buf, buf, OTX_CPT_UCODE_NAME_LENGTH); + start = tmp_buf; + + has_se = has_ie = has_ae = false; + + for (;;) { + val = strsep(&start, ";"); + if (!val) + break; + val = strim(val); + if (!*val) + continue; + + if (!strncasecmp(val, "engine_group", 12)) { + if (del_grp_idx != -1) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 13) + goto err_print; + if (kstrtoint((tmp + 12), 10, &del_grp_idx)) + goto err_print; + val = strim(val); + if (strncasecmp(val, "null", 4)) + goto err_print; + if (strlen(val) != 4) + goto err_print; + } else if (!strncasecmp(val, "se", 2) && strchr(val, ':')) { + if (has_se || ucode_idx) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX_CPT_SE_TYPES; + has_se = true; + } else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) { + if (has_ae || ucode_idx) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX_CPT_AE_TYPES; + has_ae = true; + } else { + if (ucode_idx > 1) + goto err_print; + if (!strlen(val)) + goto err_print; + if (strnstr(val, " ", strlen(val))) + goto err_print; + ucode_filename[ucode_idx++] = val; + } + } + + /* Validate input parameters */ + if (del_grp_idx == -1) { + if (!(grp_idx && ucode_idx)) + goto err_print; + + if (ucode_idx > 1 && grp_idx < 2) + goto err_print; + + if (grp_idx > OTX_CPT_MAX_ETYPES_PER_GRP) { + err_msg = "Error max 2 engine types can be attached"; + goto err_print; + } + + } else { + if (del_grp_idx < 0 || + del_grp_idx >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Invalid engine group index %d", + del_grp_idx); + ret = -EINVAL; + return ret; + } + + if (!eng_grps->grp[del_grp_idx].is_enabled) { + dev_err(dev, "Error engine_group%d is not configured", + del_grp_idx); + ret = -EINVAL; + return ret; + } + + if (grp_idx || ucode_idx) + goto err_print; + } + + mutex_lock(&eng_grps->lock); + + if (eng_grps->is_rdonly) { + dev_err(dev, "Disable VFs before modifying engine groups\n"); + ret = -EACCES; + goto err_unlock; + } + + if (del_grp_idx == -1) + /* create engine group */ + ret = create_engine_group(dev, eng_grps, engs, grp_idx, + (void **) ucode_filename, + ucode_idx, false); + else + /* delete engine group */ + ret = delete_engine_group(dev, &eng_grps->grp[del_grp_idx]); + if (ret) + goto err_unlock; + + print_dbg_info(dev, eng_grps); +err_unlock: + mutex_unlock(&eng_grps->lock); + return ret ? ret : count; +err_print: + dev_err(dev, "%s\n", err_msg); + + return ret; +} + +int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, + int pf_type) +{ + struct tar_ucode_info_t *tar_info[OTX_CPT_MAX_ETYPES_PER_GRP] = { 0 }; + struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + struct tar_arch_info_t *tar_arch = NULL; + char *tar_filename; + int i, ret = 0; + + mutex_lock(&eng_grps->lock); + + /* + * We don't create engine group for kernel crypto if attempt to create + * it was already made (when user enabled VFs for the first time) + */ + if (eng_grps->is_first_try) + goto unlock_mutex; + eng_grps->is_first_try = true; + + /* We create group for kcrypto only if no groups are configured */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].is_enabled) + goto unlock_mutex; + + switch (pf_type) { + case OTX_CPT_AE: + case OTX_CPT_SE: + tar_filename = OTX_CPT_UCODE_TAR_FILE_NAME; + break; + + default: + dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type); + ret = -EINVAL; + goto unlock_mutex; + } + + tar_arch = load_tar_archive(&pdev->dev, tar_filename); + if (!tar_arch) + goto unlock_mutex; + + /* + * If device supports SE engines and there is SE microcode in tar + * archive try to create engine group with SE engines for kernel + * crypto functionality (symmetric crypto) + */ + tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_SE_TYPES); + if (tar_info[0] && + dev_supports_eng_type(eng_grps, OTX_CPT_SE_TYPES)) { + + engs[0].type = OTX_CPT_SE_TYPES; + engs[0].count = eng_grps->avail.max_se_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) tar_info, 1, true); + if (ret) + goto release_tar_arch; + } + /* + * If device supports AE engines and there is AE microcode in tar + * archive try to create engine group with AE engines for asymmetric + * crypto functionality. + */ + tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_AE_TYPES); + if (tar_info[0] && + dev_supports_eng_type(eng_grps, OTX_CPT_AE_TYPES)) { + + engs[0].type = OTX_CPT_AE_TYPES; + engs[0].count = eng_grps->avail.max_ae_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) tar_info, 1, true); + if (ret) + goto release_tar_arch; + } + + print_dbg_info(&pdev->dev, eng_grps); +release_tar_arch: + release_tar_archive(tar_arch); +unlock_mutex: + mutex_unlock(&eng_grps->lock); + return ret; +} + +void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps, + bool is_rdonly) +{ + mutex_lock(&eng_grps->lock); + + eng_grps->is_rdonly = is_rdonly; + + mutex_unlock(&eng_grps->lock); +} + +void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt) +{ + int grp, timeout = 100; + u64 reg; + + /* Disengage the cores from groups */ + for (grp = 0; grp < OTX_CPT_MAX_ENGINE_GROUPS; grp++) { + writeq(0, cpt->reg_base + OTX_CPT_PF_GX_EN(grp)); + udelay(CSR_DELAY); + } + + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + while (reg) { + udelay(CSR_DELAY); + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + if (timeout--) { + dev_warn(&cpt->pdev->dev, "Cores still busy"); + break; + } + } + + /* Disable the cores */ + writeq(0, cpt->reg_base + OTX_CPT_PF_EXE_CTL); +} + +void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps) +{ + struct otx_cpt_eng_grp_info *grp; + int i, j; + + mutex_lock(&eng_grps->lock); + if (eng_grps->is_ucode_load_created) { + device_remove_file(&pdev->dev, + &eng_grps->ucode_load_attr); + eng_grps->is_ucode_load_created = false; + } + + /* First delete all mirroring engine groups */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].mirror.is_ena) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Delete remaining engine groups */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Release memory */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + kfree(grp->engs[j].bmap); + grp->engs[j].bmap = NULL; + } + } + + mutex_unlock(&eng_grps->lock); +} + +int otx_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, int pf_type) +{ + struct otx_cpt_eng_grp_info *grp; + int i, j, ret = 0; + + mutex_init(&eng_grps->lock); + eng_grps->obj = pci_get_drvdata(pdev); + eng_grps->avail.se_cnt = eng_grps->avail.max_se_cnt; + eng_grps->avail.ae_cnt = eng_grps->avail.max_ae_cnt; + + eng_grps->engs_num = eng_grps->avail.max_se_cnt + + eng_grps->avail.max_ae_cnt; + if (eng_grps->engs_num > OTX_CPT_MAX_ENGINES) { + dev_err(&pdev->dev, + "Number of engines %d > than max supported %d", + eng_grps->engs_num, OTX_CPT_MAX_ENGINES); + ret = -EINVAL; + goto err; + } + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + grp->g = eng_grps; + grp->idx = i; + + snprintf(grp->sysfs_info_name, OTX_CPT_UCODE_NAME_LENGTH, + "engine_group%d", i); + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + grp->engs[j].bmap = + kcalloc(BITS_TO_LONGS(eng_grps->engs_num), + sizeof(long), GFP_KERNEL); + if (!grp->engs[j].bmap) { + ret = -ENOMEM; + goto err; + } + } + } + + switch (pf_type) { + case OTX_CPT_SE: + /* OcteonTX 83XX SE CPT PF has only SE engines attached */ + eng_grps->eng_types_supported = 1 << OTX_CPT_SE_TYPES; + break; + + case OTX_CPT_AE: + /* OcteonTX 83XX AE CPT PF has only AE engines attached */ + eng_grps->eng_types_supported = 1 << OTX_CPT_AE_TYPES; + break; + + default: + dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type); + ret = -EINVAL; + goto err; + } + + eng_grps->ucode_load_attr.show = NULL; + eng_grps->ucode_load_attr.store = ucode_load_store; + eng_grps->ucode_load_attr.attr.name = "ucode_load"; + eng_grps->ucode_load_attr.attr.mode = 0220; + sysfs_attr_init(&eng_grps->ucode_load_attr.attr); + ret = device_create_file(&pdev->dev, + &eng_grps->ucode_load_attr); + if (ret) + goto err; + eng_grps->is_ucode_load_created = true; + + print_dbg_info(&pdev->dev, eng_grps); + return ret; +err: + otx_cpt_cleanup_eng_grps(pdev, eng_grps); + return ret; +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h new file mode 100644 index 000000000000..14f02b60d0c2 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTPF_UCODE_H +#define __OTX_CPTPF_UCODE_H + +#include +#include +#include +#include "otx_cpt_hw_types.h" + +/* CPT ucode name maximum length */ +#define OTX_CPT_UCODE_NAME_LENGTH 64 +/* + * On OcteonTX 83xx platform, only one type of engines is allowed to be + * attached to an engine group. + */ +#define OTX_CPT_MAX_ETYPES_PER_GRP 1 + +/* Default tar archive file names */ +#define OTX_CPT_UCODE_TAR_FILE_NAME "cpt8x-mc.tar" + +/* CPT ucode alignment */ +#define OTX_CPT_UCODE_ALIGNMENT 128 + +/* CPT ucode signature size */ +#define OTX_CPT_UCODE_SIGN_LEN 256 + +/* Microcode version string length */ +#define OTX_CPT_UCODE_VER_STR_SZ 44 + +/* Maximum number of supported engines/cores on OcteonTX 83XX platform */ +#define OTX_CPT_MAX_ENGINES 64 + +#define OTX_CPT_ENGS_BITMASK_LEN (OTX_CPT_MAX_ENGINES/(BITS_PER_BYTE * \ + sizeof(unsigned long))) + +/* Microcode types */ +enum otx_cpt_ucode_type { + OTX_CPT_AE_UC_TYPE = 1, /* AE-MAIN */ + OTX_CPT_SE_UC_TYPE1 = 20, /* SE-MAIN - combination of 21 and 22 */ + OTX_CPT_SE_UC_TYPE2 = 21, /* Fast Path IPSec + AirCrypto */ + OTX_CPT_SE_UC_TYPE3 = 22, /* + * Hash + HMAC + FlexiCrypto + RNG + Full + * Feature IPSec + AirCrypto + Kasumi + */ +}; + +struct otx_cpt_bitmap { + unsigned long bits[OTX_CPT_ENGS_BITMASK_LEN]; + int size; +}; + +struct otx_cpt_engines { + int type; + int count; +}; + +/* Microcode version number */ +struct otx_cpt_ucode_ver_num { + u8 nn; + u8 xx; + u8 yy; + u8 zz; +}; + +struct otx_cpt_ucode_hdr { + struct otx_cpt_ucode_ver_num ver_num; + u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ]; + u32 code_length; + u32 padding[3]; +}; + +struct otx_cpt_ucode { + u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];/* + * ucode version in readable format + */ + struct otx_cpt_ucode_ver_num ver_num;/* ucode version number */ + char filename[OTX_CPT_UCODE_NAME_LENGTH]; /* ucode filename */ + dma_addr_t dma; /* phys address of ucode image */ + dma_addr_t align_dma; /* aligned phys address of ucode image */ + void *va; /* virt address of ucode image */ + void *align_va; /* aligned virt address of ucode image */ + u32 size; /* ucode image size */ + int type; /* ucode image type SE or AE */ +}; + +struct tar_ucode_info_t { + struct list_head list; + struct otx_cpt_ucode ucode;/* microcode information */ + const u8 *ucode_ptr; /* pointer to microcode in tar archive */ +}; + +/* Maximum and current number of engines available for all engine groups */ +struct otx_cpt_engs_available { + int max_se_cnt; + int max_ae_cnt; + int se_cnt; + int ae_cnt; +}; + +/* Engines reserved to an engine group */ +struct otx_cpt_engs_rsvd { + int type; /* engine type */ + int count; /* number of engines attached */ + int offset; /* constant offset of engine type in the bitmap */ + unsigned long *bmap; /* attached engines bitmap */ + struct otx_cpt_ucode *ucode; /* ucode used by these engines */ +}; + +struct otx_cpt_mirror_info { + int is_ena; /* + * is mirroring enabled, it is set only for engine + * group which mirrors another engine group + */ + int idx; /* + * index of engine group which is mirrored by this + * group, set only for engine group which mirrors + * another group + */ + int ref_count; /* + * number of times this engine group is mirrored by + * other groups, this is set only for engine group + * which is mirrored by other group(s) + */ +}; + +struct otx_cpt_eng_grp_info { + struct otx_cpt_eng_grps *g; /* pointer to engine_groups structure */ + struct device_attribute info_attr; /* group info entry attr */ + /* engines attached */ + struct otx_cpt_engs_rsvd engs[OTX_CPT_MAX_ETYPES_PER_GRP]; + /* Microcode information */ + struct otx_cpt_ucode ucode[OTX_CPT_MAX_ETYPES_PER_GRP]; + /* sysfs info entry name */ + char sysfs_info_name[OTX_CPT_UCODE_NAME_LENGTH]; + /* engine group mirroring information */ + struct otx_cpt_mirror_info mirror; + int idx; /* engine group index */ + bool is_enabled; /* + * is engine group enabled, engine group is enabled + * when it has engines attached and ucode loaded + */ +}; + +struct otx_cpt_eng_grps { + struct otx_cpt_eng_grp_info grp[OTX_CPT_MAX_ENGINE_GROUPS]; + struct device_attribute ucode_load_attr;/* ucode load attr */ + struct otx_cpt_engs_available avail; + struct mutex lock; + void *obj; + int engs_num; /* total number of engines supported */ + int eng_types_supported; /* engine types supported SE, AE */ + u8 eng_ref_cnt[OTX_CPT_MAX_ENGINES];/* engines reference count */ + bool is_ucode_load_created; /* is ucode_load sysfs entry created */ + bool is_first_try; /* is this first try to create kcrypto engine grp */ + bool is_rdonly; /* do engine groups configuration can be modified */ +}; + +int otx_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, int pf_type); +void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps); +int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, + int pf_type); +void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps, + bool is_rdonly); +int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type); +int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp, + int eng_type); + +#endif /* __OTX_CPTPF_UCODE_H */ From 10b4f09491bfeb0b298cb2f49df585510ee6189a Mon Sep 17 00:00:00 2001 From: SrujanaChalla Date: Fri, 13 Mar 2020 17:17:07 +0530 Subject: [PATCH 148/162] crypto: marvell - add the Virtual Function driver for CPT Add support for the cryptographic accelerator unit virtual functions on OcteonTX 83XX SoC. Co-developed-by: Lukasz Bartosik Signed-off-by: Lukasz Bartosik Signed-off-by: SrujanaChalla Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx/Makefile | 4 +- .../marvell/octeontx/otx_cpt_hw_types.h | 577 +++++- drivers/crypto/marvell/octeontx/otx_cptvf.h | 104 + .../crypto/marvell/octeontx/otx_cptvf_algs.c | 1744 +++++++++++++++++ .../crypto/marvell/octeontx/otx_cptvf_algs.h | 188 ++ .../crypto/marvell/octeontx/otx_cptvf_main.c | 985 ++++++++++ .../crypto/marvell/octeontx/otx_cptvf_mbox.c | 247 +++ .../marvell/octeontx/otx_cptvf_reqmgr.c | 612 ++++++ .../marvell/octeontx/otx_cptvf_reqmgr.h | 227 +++ 9 files changed, 4686 insertions(+), 2 deletions(-) create mode 100644 drivers/crypto/marvell/octeontx/otx_cptvf.h create mode 100644 drivers/crypto/marvell/octeontx/otx_cptvf_algs.c create mode 100644 drivers/crypto/marvell/octeontx/otx_cptvf_algs.h create mode 100644 drivers/crypto/marvell/octeontx/otx_cptvf_main.c create mode 100644 drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c create mode 100644 drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c create mode 100644 drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h diff --git a/drivers/crypto/marvell/octeontx/Makefile b/drivers/crypto/marvell/octeontx/Makefile index 627d00eb9b3b..5e956fe1a85b 100644 --- a/drivers/crypto/marvell/octeontx/Makefile +++ b/drivers/crypto/marvell/octeontx/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o octeontx-cptvf.o octeontx-cpt-objs := otx_cptpf_main.o otx_cptpf_mbox.o otx_cptpf_ucode.o +octeontx-cptvf-objs := otx_cptvf_main.o otx_cptvf_mbox.o otx_cptvf_reqmgr.o \ + otx_cptvf_algs.o diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h index bec483fbfefd..b8bdb9f134f3 100644 --- a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h +++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h @@ -15,11 +15,19 @@ /* Device IDs */ #define OTX_CPT_PCI_PF_DEVICE_ID 0xa040 +#define OTX_CPT_PCI_VF_DEVICE_ID 0xa041 #define OTX_CPT_PCI_PF_SUBSYS_ID 0xa340 +#define OTX_CPT_PCI_VF_SUBSYS_ID 0xa341 /* Configuration and status registers are in BAR0 on OcteonTX platform */ #define OTX_CPT_PF_PCI_CFG_BAR 0 +#define OTX_CPT_VF_PCI_CFG_BAR 0 + +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_OFFSET(a, b) \ + (0x000020000000ll + 0x1000000000ll * (a) + 0x100000ll * (b)) +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_SIZE 0x400000 + /* Mailbox interrupts offset */ #define OTX_CPT_PF_MBOX_INT 3 #define OTX_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a)) @@ -28,6 +36,19 @@ /* Maximum supported microcode groups */ #define OTX_CPT_MAX_ENGINE_GROUPS 8 +/* CPT instruction size in bytes */ +#define OTX_CPT_INST_SIZE 64 +/* CPT queue next chunk pointer size in bytes */ +#define OTX_CPT_NEXT_CHUNK_PTR_SIZE 8 + +/* OcteonTX CPT VF MSIX vectors and their offsets */ +#define OTX_CPT_VF_MSIX_VECTORS 2 +#define OTX_CPT_VF_INTR_MBOX_MASK BIT(0) +#define OTX_CPT_VF_INTR_DOVF_MASK BIT(1) +#define OTX_CPT_VF_INTR_IRDE_MASK BIT(2) +#define OTX_CPT_VF_INTR_NWRP_MASK BIT(3) +#define OTX_CPT_VF_INTR_SERR_MASK BIT(4) + /* OcteonTX CPT PF registers */ #define OTX_CPT_PF_CONSTANTS (0x0ll) #define OTX_CPT_PF_RESET (0x100ll) @@ -78,6 +99,190 @@ #define OTX_CPT_PF_VFX_MBOXX(b, c) (0x8001000ll | (u64)(b) << 20 | \ (u64)(c) << 8) +/* OcteonTX CPT VF registers */ +#define OTX_CPT_VQX_CTL(b) (0x100ll | (u64)(b) << 20) +#define OTX_CPT_VQX_SADDR(b) (0x200ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_WAIT(b) (0x400ll | (u64)(b) << 20) +#define OTX_CPT_VQX_INPROG(b) (0x410ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE(b) (0x420ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ACK(b) (0x440ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1S(b) (0x460ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1C(b) (0x468ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1S(b) (0x470ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1C(b) (0x478ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT(b) (0x500ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT_W1S(b) (0x508ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1S(b) (0x510ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1C(b) (0x518ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DOORBELL(b) (0x600ll | (u64)(b) << 20) +#define OTX_CPT_VFX_PF_MBOXX(b, c) (0x1000ll | ((b) << 20) | ((c) << 3)) + +/* + * Enumeration otx_cpt_ucode_error_code_e + * + * Enumerates ucode errors + */ +enum otx_cpt_ucode_error_code_e { + CPT_NO_UCODE_ERROR = 0x00, + ERR_OPCODE_UNSUPPORTED = 0x01, + + /* Scatter gather */ + ERR_SCATTER_GATHER_WRITE_LENGTH = 0x02, + ERR_SCATTER_GATHER_LIST = 0x03, + ERR_SCATTER_GATHER_NOT_SUPPORTED = 0x04, + +}; + +/* + * Enumeration otx_cpt_comp_e + * + * CPT OcteonTX Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum otx_cpt_comp_e { + CPT_COMP_E_NOTDONE = 0x00, + CPT_COMP_E_GOOD = 0x01, + CPT_COMP_E_FAULT = 0x02, + CPT_COMP_E_SWERR = 0x03, + CPT_COMP_E_HWERR = 0x04, + CPT_COMP_E_LAST_ENTRY = 0x05 +}; + +/* + * Enumeration otx_cpt_vf_int_vec_e + * + * CPT OcteonTX VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx_cpt_vf_int_vec_e { + CPT_VF_INT_VEC_E_MISC = 0x00, + CPT_VF_INT_VEC_E_DONE = 0x01 +}; + +/* + * Structure cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union otx_cpt_inst_s { + u64 u[8]; + + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_0_15:16; +#else /* Word 0 - Little Endian */ + u64 reserved_0_15:16; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 res_addr; +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */ + u64 reserved_172_191:20; + u64 grp:10; + u64 tt:2; + u64 tag:32; +#else /* Word 2 - Little Endian */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_191:20; +#endif /* Word 2 - End */ + u64 wq_ptr; + u64 ei0; + u64 ei1; + u64 ei2; + u64 ei3; + } s; +}; + +/* + * Structure cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union otx_cpt_res_s { + u64 u[2]; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_8_15:8; + u64 compcode:8; +#else /* Word 0 - Little Endian */ + u64 compcode:8; + u64 reserved_8_15:8; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 reserved_64_127; + } s; +}; + /* * Register (NCB) otx_cpt#_pf_bist_status * @@ -246,4 +451,374 @@ union otx_cptx_pf_qx_ctl { #endif /* Word 0 - End */ } s; }; -#endif /* __OTX_CPT_HW_TYPES_H */ + +/* + * Register (NCB) otx_cpt#_vq#_saddr + * + * CPT Queue Starting Buffer Address Registers + * These registers set the instruction buffer starting address. + * otx_cptx_vqx_saddr_s + * Word0 + * reserved_49_63:15 [63:49] Reserved. + * ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned). + * When written, it is the initial buffer starting address; when read, + * it is the next read pointer to be requested from L2C. The PTR field + * is overwritten with the next pointer each time that the command buffer + * segment is exhausted. New commands will then be read from the newly + * specified command buffer pointer. + * reserved_0_5:6 [5:0] Reserved. + * + */ +union otx_cptx_vqx_saddr { + u64 u; + struct otx_cptx_vqx_saddr_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_49_63:15; + u64 ptr:43; + u64 reserved_0_5:6; +#else /* Word 0 - Little Endian */ + u64 reserved_0_5:6; + u64 ptr:43; + u64 reserved_49_63:15; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_ena_w1s + * + * CPT Queue Misc Interrupt Enable Set Register + * This register sets interrupt enable bits. + * otx_cptx_vqx_misc_ena_w1s_s + * Word0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[SWERR]. + * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[NWRP]. + * irde:1 [2:2](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[IRDE]. + * dovf:1 [1:1](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[DOVF]. + * mbox:1 [0:0](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[MBOX]. + * + */ +union otx_cptx_vqx_misc_ena_w1s { + u64 u; + struct otx_cptx_vqx_misc_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_doorbell + * + * CPT Queue Doorbell Registers + * Doorbells for the CPT instruction queues. + * otx_cptx_vqx_doorbell_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add + * to the CPT instruction doorbell count. Readback value is the the + * current number of pending doorbell requests. If counter overflows + * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to + * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], + * then write a value of 2^20 minus the read [DBELL_CNT], then write one + * to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and + * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8. + * All CPT instructions are 8 words and require a doorbell count of + * multiple of 8. + */ +union otx_cptx_vqx_doorbell { + u64 u; + struct otx_cptx_vqx_doorbell_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 dbell_cnt:20; +#else /* Word 0 - Little Endian */ + u64 dbell_cnt:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_inprog + * + * CPT Queue In Progress Count Registers + * These registers contain the per-queue instruction in flight registers. + * otx_cptx_vqx_inprog_s + * Word0 + * reserved_8_63:56 [63:8] Reserved. + * inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions + * for the VF for which CPT is fetching, executing or responding to + * instructions. However this does not include any interrupts that are + * awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0). + * A queue may not be reconfigured until: + * 1. CPT()_VQ()_CTL[ENA] is cleared by software. + * 2. [INFLIGHT] is polled until equals to zero. + */ +union otx_cptx_vqx_inprog { + u64 u; + struct otx_cptx_vqx_inprog_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_8_63:56; + u64 inflight:8; +#else /* Word 0 - Little Endian */ + u64 inflight:8; + u64 reserved_8_63:56; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_int + * + * CPT Queue Misc Interrupt Register + * These registers contain the per-queue miscellaneous interrupts. + * otx_cptx_vqx_misc_int_s + * Word 0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1C/H) Software error from engines. + * nwrp:1 [3:3](R/W1C/H) NCB result write response error. + * irde:1 [2:2](R/W1C/H) Instruction NCB read response error. + * dovf:1 [1:1](R/W1C/H) Doorbell overflow. + * mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when + * CPT()_VF()_PF_MBOX(0) is written. + * + */ +union otx_cptx_vqx_misc_int { + u64 u; + struct otx_cptx_vqx_misc_int_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ack + * + * CPT Queue Done Count Ack Registers + * This register is written by software to acknowledge interrupts. + * otx_cptx_vqx_done_ack_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE]. + * Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge + * interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt + * will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE] + * are satisfied. + * + */ +union otx_cptx_vqx_done_ack { + u64 u; + struct otx_cptx_vqx_done_ack_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done_ack:20; +#else /* Word 0 - Little Endian */ + u64 done_ack:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done + * + * CPT Queue Done Count Registers + * These registers contain the per-queue instruction done count. + * cptx_vqx_done_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that + * instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the + * instruction finishes. Write to this field are for diagnostic use only; + * instead software writes CPT()_VQ()_DONE_ACK with the number of + * decrements for this field. + * Interrupts are sent as follows: + * * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the + * interrupt coalescing timer is held to zero, and an interrupt is not + * sent. + * * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer + * counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or + * CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough + * time has passed or enough results have arrived, then the interrupt is + * sent. + * * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written + * but this is not typical), the interrupt coalescing timer restarts. + * Note after decrementing this interrupt equation is recomputed, + * for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT] + * and because the timer is zero, the interrupt will be resent immediately. + * (This covers the race case between software acknowledging an interrupt + * and a result returning.) + * * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent, + * but the counting described above still occurs. + * Since CPT instructions complete out-of-order, if software is using + * completion interrupts the suggested scheme is to request a DONEINT on + * each request, and when an interrupt arrives perform a "greedy" scan for + * completions; even if a later command is acknowledged first this will + * not result in missing a completion. + * Software is responsible for making sure [DONE] does not overflow; + * for example by insuring there are not more than 2^20-1 instructions in + * flight that may request interrupts. + * + */ +union otx_cptx_vqx_done { + u64 u; + struct otx_cptx_vqx_done_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done:20; +#else /* Word 0 - Little Endian */ + u64 done:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_wait + * + * CPT Queue Done Interrupt Coalescing Wait Registers + * Specifies the per queue interrupt coalescing settings. + * cptx_vqx_done_wait_s + * Word0 + * reserved_48_63:16 [63:48] Reserved. + * time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0 + * or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer + * reaches [TIME_WAIT]*1024 then interrupt coalescing ends. + * see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled. + * reserved_20_31:12 [31:20] Reserved. + * num_wait:20 [19:0](R/W) Number of messages hold-off. + * When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends + * see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1. + * + */ +union otx_cptx_vqx_done_wait { + u64 u; + struct otx_cptx_vqx_done_wait_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 time_wait:16; + u64 reserved_20_31:12; + u64 num_wait:20; +#else /* Word 0 - Little Endian */ + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ena_w1s + * + * CPT Queue Done Interrupt Enable Set Registers + * Write 1 to these registers will enable the DONEINT interrupt for the queue. + * cptx_vqx_done_ena_w1s_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue. + * Write 0 has no effect. Read will return the enable bit. + */ +union otx_cptx_vqx_done_ena_w1s { + u64 u; + struct otx_cptx_vqx_done_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 done:1; +#else /* Word 0 - Little Endian */ + u64 done:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_ctl + * + * CPT VF Queue Control Registers + * This register configures queues. This register should be changed (other than + * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_vqx_ctl_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * ena:1 [0:0](R/W/H) Enables the logical instruction queue. + * See also CPT()_PF_Q()_CTL[CONT_ERR] and CPT()_VQ()_INPROG[INFLIGHT]. + * 1 = Queue is enabled. + * 0 = Queue is disabled. + */ +union otx_cptx_vqx_ctl { + u64 u; + struct otx_cptx_vqx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 ena:1; +#else /* Word 0 - Little Endian */ + u64 ena:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Error Address/Error Codes + * + * In the event of a severe error, microcode writes an 8-byte Error Code + * value (ECODE) to host memory at the Rptr address specified by the host + * system (in the 64-byte request). + * + * Word0 + * [63:56](R) 8-bit completion code + * [55:48](R) Number of the core that reported the severe error + * [47:0] Lower 6 bytes of M-Inst word2. Used to assist in uniquely + * identifying which specific instruction caused the error. This assumes + * that each instruction has a unique result location (RPTR), at least + * for a given period of time. + */ +union otx_cpt_error_code { + u64 u; + struct otx_cpt_error_code_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + uint64_t ccode:8; + uint64_t coreid:8; + uint64_t rptr6:48; +#else /* Word 0 - Little Endian */ + uint64_t rptr6:48; + uint64_t coreid:8; + uint64_t ccode:8; +#endif /* Word 0 - End */ + } s; +}; + +#endif /*__OTX_CPT_HW_TYPES_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf.h b/drivers/crypto/marvell/octeontx/otx_cptvf.h new file mode 100644 index 000000000000..dd02f21659af --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_H +#define __OTX_CPTVF_H + +#include +#include +#include +#include "otx_cpt_common.h" +#include "otx_cptvf_reqmgr.h" + +/* Flags to indicate the features supported */ +#define OTX_CPT_FLAG_DEVICE_READY BIT(1) +#define otx_cpt_device_ready(cpt) ((cpt)->flags & OTX_CPT_FLAG_DEVICE_READY) +/* Default command queue length */ +#define OTX_CPT_CMD_QLEN (4*2046) +#define OTX_CPT_CMD_QCHUNK_SIZE 1023 +#define OTX_CPT_NUM_QS_PER_VF 1 + +struct otx_cpt_cmd_chunk { + u8 *head; + dma_addr_t dma_addr; + u32 size; /* Chunk size, max OTX_CPT_INST_CHUNK_MAX_SIZE */ + struct list_head nextchunk; +}; + +struct otx_cpt_cmd_queue { + u32 idx; /* Command queue host write idx */ + u32 num_chunks; /* Number of command chunks */ + struct otx_cpt_cmd_chunk *qhead;/* + * Command queue head, instructions + * are inserted here + */ + struct otx_cpt_cmd_chunk *base; + struct list_head chead; +}; + +struct otx_cpt_cmd_qinfo { + u32 qchunksize; /* Command queue chunk size */ + struct otx_cpt_cmd_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cpt_pending_qinfo { + u32 num_queues; /* Number of queues supported */ + struct otx_cpt_pending_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +#define for_each_pending_queue(qinfo, q, i) \ + for (i = 0, q = &qinfo->queue[i]; i < qinfo->num_queues; i++, \ + q = &qinfo->queue[i]) + +struct otx_cptvf_wqe { + struct tasklet_struct twork; + struct otx_cptvf *cptvf; +}; + +struct otx_cptvf_wqe_info { + struct otx_cptvf_wqe vq_wqe[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cptvf { + u16 flags; /* Flags to hold device status bits */ + u8 vfid; /* Device Index 0...OTX_CPT_MAX_VF_NUM */ + u8 num_vfs; /* Number of enabled VFs */ + u8 vftype; /* VF type of SE_TYPE(2) or AE_TYPE(1) */ + u8 vfgrp; /* VF group (0 - 8) */ + u8 node; /* Operating node: Bits (46:44) in BAR0 address */ + u8 priority; /* + * VF priority ring: 1-High proirity round + * robin ring;0-Low priority round robin ring; + */ + struct pci_dev *pdev; /* Pci device handle */ + void __iomem *reg_base; /* Register start address */ + void *wqe_info; /* BH worker info */ + /* MSI-X */ + cpumask_var_t affinity_mask[OTX_CPT_VF_MSIX_VECTORS]; + /* Command and Pending queues */ + u32 qsize; + u32 num_queues; + struct otx_cpt_cmd_qinfo cqinfo; /* Command queue information */ + struct otx_cpt_pending_qinfo pqinfo; /* Pending queue information */ + /* VF-PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +}; + +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group); +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf); +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf); +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf); +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf); +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val); + +#endif /* __OTX_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c new file mode 100644 index 000000000000..946fb62949b2 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -0,0 +1,1744 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define CPT_MAX_VF_NUM 64 +/* Size of salt in AES GCM mode */ +#define AES_GCM_SALT_SIZE 4 +/* Size of IV in AES GCM mode */ +#define AES_GCM_IV_SIZE 8 +/* Size of ICV (Integrity Check Value) in AES GCM mode */ +#define AES_GCM_ICV_SIZE 16 +/* Offset of IV in AES GCM mode */ +#define AES_GCM_IV_OFFSET 8 +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == OTX_CPT_DMA_GATHER_SCATTER) ? (1 << 7) : 0) + +/* Truncated SHA digest size */ +#define SHA1_TRUNC_DIGEST_SIZE 12 +#define SHA256_TRUNC_DIGEST_SIZE 16 +#define SHA384_TRUNC_DIGEST_SIZE 24 +#define SHA512_TRUNC_DIGEST_SIZE 32 + +static DEFINE_MUTEX(mutex); +static int is_crypto_registered; + +struct cpt_device_desc { + enum otx_cptpf_type pf_type; + struct pci_dev *dev; + int num_queues; +}; + +struct cpt_device_table { + atomic_t count; + struct cpt_device_desc desc[CPT_MAX_VF_NUM]; +}; + +static struct cpt_device_table se_devices = { + .count = ATOMIC_INIT(0) +}; + +static struct cpt_device_table ae_devices = { + .count = ATOMIC_INIT(0) +}; + +static inline int get_se_device(struct pci_dev **pdev, int *cpu_num) +{ + int count, ret = 0; + + count = atomic_read(&se_devices.count); + if (count < 1) + return -ENODEV; + + *cpu_num = get_cpu(); + + if (se_devices.desc[0].pf_type == OTX_CPT_SE) { + /* + * On OcteonTX platform there is one CPT instruction queue bound + * to each VF. We get maximum performance if one CPT queue + * is available for each cpu otherwise CPT queues need to be + * shared between cpus. + */ + if (*cpu_num >= count) + *cpu_num %= count; + *pdev = se_devices.desc[*cpu_num].dev; + } else { + pr_err("Unknown PF type %d\n", se_devices.desc[0].pf_type); + ret = -EINVAL; + } + put_cpu(); + + return ret; +} + +static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req) +{ + struct otx_cpt_req_ctx *rctx; + struct aead_request *req; + struct crypto_aead *tfm; + + req = container_of(cpt_req->areq, struct aead_request, base); + tfm = crypto_aead_reqtfm(req); + rctx = aead_request_ctx(req); + if (memcmp(rctx->fctx.hmac.s.hmac_calc, + rctx->fctx.hmac.s.hmac_recv, + crypto_aead_authsize(tfm)) != 0) + return -EBADMSG; + + return 0; +} + +static void otx_cpt_aead_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct otx_cpt_req_info *cpt_req; + struct pci_dev *pdev; + + cpt_req = cpt_info->req; + if (!status) { + /* + * When selected cipher is NULL we need to manually + * verify whether calculated hmac value matches + * received hmac value + */ + if (cpt_req->req_type == OTX_CPT_AEAD_ENC_DEC_NULL_REQ && + !cpt_req->is_enc) + status = validate_hmac_cipher_null(cpt_req); + } + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + if (areq) + areq->complete(areq, status); +} + +static void output_iv_copyback(struct crypto_async_request *areq) +{ + struct otx_cpt_req_info *req_info; + struct skcipher_request *sreq; + struct crypto_skcipher *stfm; + struct otx_cpt_req_ctx *rctx; + struct otx_cpt_enc_ctx *ctx; + u32 start, ivsize; + + sreq = container_of(areq, struct skcipher_request, base); + stfm = crypto_skcipher_reqtfm(sreq); + ctx = crypto_skcipher_ctx(stfm); + if (ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) { + rctx = skcipher_request_ctx(sreq); + req_info = &rctx->cpt_req; + ivsize = crypto_skcipher_ivsize(stfm); + start = sreq->cryptlen - ivsize; + + if (req_info->is_enc) { + scatterwalk_map_and_copy(sreq->iv, sreq->dst, start, + ivsize, 0); + } else { + if (sreq->src != sreq->dst) { + scatterwalk_map_and_copy(sreq->iv, sreq->src, + start, ivsize, 0); + } else { + memcpy(sreq->iv, req_info->iv_out, ivsize); + kfree(req_info->iv_out); + } + } + } +} + +static void otx_cpt_skcipher_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct pci_dev *pdev; + + if (areq) { + if (!status) + output_iv_copyback(areq); + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + areq->complete(areq, status); + } +} + +static inline void update_input_data(struct otx_cpt_req_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, inp_sg->length); + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + inp_sg = sg_next(inp_sg); + } +} + +static inline void update_output_data(struct otx_cpt_req_info *req_info, + struct scatterlist *outp_sg, + u32 offset, u32 nbytes, u32 *argcnt) +{ + req_info->rlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, outp_sg->length - offset); + u8 *ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *) (ptr + offset); + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + offset = 0; + outp_sg = sg_next(outp_sg); + } +} + +static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, + u32 *argcnt) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); + struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm); + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int ivsize = crypto_skcipher_ivsize(stfm); + u32 start = req->cryptlen - ivsize; + u64 *ctrl_flags = NULL; + gfp_t flags; + + flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + if (enc) { + req_info->req.opcode.s.minor = 2; + } else { + req_info->req.opcode.s.minor = 3; + if ((ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) && + req->src == req->dst) { + req_info->iv_out = kmalloc(ivsize, flags); + if (!req_info->iv_out) + return -ENOMEM; + + scatterwalk_map_and_copy(req_info->iv_out, req->src, + start, ivsize, 0); + } + } + /* Encryption data length */ + req_info->req.param1 = req->cryptlen; + /* Authentication data length */ + req_info->req.param2 = 0; + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR; + + if (ctx->cipher_type == OTX_CPT_AES_XTS) + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2); + else + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len); + + memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm)); + + ctrl_flags = (u64 *)&fctx->enc.enc_ctrl.flags; + *ctrl_flags = cpu_to_be64(*ctrl_flags); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx); + + ++(*argcnt); + + return 0; +} + +static inline u32 create_input_list(struct skcipher_request *req, u32 enc, + u32 enc_iv_len) +{ + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + int ret; + + ret = create_ctx_hdr(req, enc, &argcnt); + if (ret) + return ret; + + update_input_data(req_info, req->src, req->cryptlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline void create_output_list(struct skcipher_request *req, + u32 enc_iv_len) +{ + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + /* + * OUTPUT Buffer Processing + * AES encryption/decryption output would be + * received in the following format + * + * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----| + * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ] + */ + update_output_data(req_info, req->dst, 0, req->cryptlen, &argcnt); + req_info->outcnt = argcnt; +} + +static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 enc_iv_len = crypto_skcipher_ivsize(stfm); + struct pci_dev *pdev; + int status, cpu_num; + + /* Validate that request doesn't exceed maximum CPT supported size */ + if (req->cryptlen > OTX_CPT_MAX_REQ_SIZE) + return -E2BIG; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.flags = 0; + + status = create_input_list(req, enc, enc_iv_len); + if (status) + return status; + create_output_list(req, enc_iv_len); + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->callback = (void *)otx_cpt_skcipher_callback; + req_info->areq = &req->base; + req_info->req_type = OTX_CPT_ENC_DEC_REQ; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + req_info->ctrl.s.grp = 0; + + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + status = otx_cpt_do_request(pdev, req_info, cpu_num); + + return status; +} + +static int otx_cpt_skcipher_encrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, true); +} + +static int otx_cpt_skcipher_decrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, false); +} + +static int otx_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + const u8 *key2 = key + (keylen / 2); + const u8 *key1 = key; + int ret; + + ret = xts_check_key(crypto_skcipher_tfm(tfm), key, keylen); + if (ret) + return ret; + ctx->key_len = keylen; + memcpy(ctx->enc_key, key1, keylen / 2); + memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2); + ctx->cipher_type = OTX_CPT_AES_XTS; + switch (ctx->key_len) { + case 2 * AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case 2 * AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int cpt_des_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (keylen != DES3_EDE_KEY_SIZE) + return -EINVAL; + + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return 0; +} + +static int cpt_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return 0; +} + +static int otx_cpt_skcipher_cbc_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CBC); +} + +static int otx_cpt_skcipher_ecb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_ECB); +} + +static int otx_cpt_skcipher_cfb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CFB); +} + +static int otx_cpt_skcipher_cbc_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_CBC); +} + +static int otx_cpt_skcipher_ecb_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_ECB); +} + +static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + /* + * Additional memory for skcipher_request is + * allocated since the cryptd daemon uses + * this memory for request_ctx information + */ + crypto_skcipher_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx) + + sizeof(struct skcipher_request)); + + return 0; +} + +static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + ctx->cipher_type = cipher_type; + ctx->mac_type = mac_type; + + /* + * When selected cipher is NULL we use HMAC opcode instead of + * FLEXICRYPTO opcode therefore we don't need to use HASH algorithms + * for calculating ipad and opad + */ + if (ctx->cipher_type != OTX_CPT_CIPHER_NULL) { + switch (ctx->mac_type) { + case OTX_CPT_SHA1: + ctx->hashalg = crypto_alloc_shash("sha1", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA256: + ctx->hashalg = crypto_alloc_shash("sha256", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA384: + ctx->hashalg = crypto_alloc_shash("sha384", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA512: + ctx->hashalg = crypto_alloc_shash("sha512", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + } + } + + crypto_aead_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx)); + + return 0; +} + +static int otx_cpt_aead_cbc_aes_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA1); +} + +static int otx_cpt_aead_cbc_aes_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA256); +} + +static int otx_cpt_aead_cbc_aes_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA384); +} + +static int otx_cpt_aead_cbc_aes_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA512); +} + +static int otx_cpt_aead_ecb_null_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA1); +} + +static int otx_cpt_aead_ecb_null_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA256); +} + +static int otx_cpt_aead_ecb_null_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA384); +} + +static int otx_cpt_aead_ecb_null_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA512); +} + +static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_GCM, OTX_CPT_MAC_NULL); +} + +static void otx_cpt_aead_exit(struct crypto_aead *tfm) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + kfree(ctx->ipad); + kfree(ctx->opad); + if (ctx->hashalg) + crypto_free_shash(ctx->hashalg); + kfree(ctx->sdesc); +} + +/* + * This is the Integrity Check Value validation (aka the authentication tag + * length) + */ +static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + + switch (ctx->mac_type) { + case OTX_CPT_SHA1: + if (authsize != SHA1_DIGEST_SIZE && + authsize != SHA1_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA1_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA256: + if (authsize != SHA256_DIGEST_SIZE && + authsize != SHA256_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA256_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA384: + if (authsize != SHA384_DIGEST_SIZE && + authsize != SHA384_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA384_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA512: + if (authsize != SHA512_DIGEST_SIZE && + authsize != SHA512_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA512_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_MAC_NULL: + if (ctx->cipher_type == OTX_CPT_AES_GCM) { + if (authsize != AES_GCM_ICV_SIZE) + return -EINVAL; + } else + return -EINVAL; + break; + + default: + return -EINVAL; + } + + tfm->authsize = authsize; + return 0; +} + +static struct otx_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg) +{ + struct otx_cpt_sdesc *sdesc; + int size; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return NULL; + + sdesc->shash.tfm = alg; + + return sdesc; +} + +static inline void swap_data32(void *buf, u32 len) +{ + u32 *store = (u32 *) buf; + int i = 0; + + for (i = 0 ; i < len/sizeof(u32); i++, store++) + *store = cpu_to_be32(*store); +} + +static inline void swap_data64(void *buf, u32 len) +{ + u64 *store = (u64 *) buf; + int i = 0; + + for (i = 0 ; i < len/sizeof(u64); i++, store++) + *store = cpu_to_be64(*store); +} + +static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) +{ + struct sha512_state *sha512; + struct sha256_state *sha256; + struct sha1_state *sha1; + + switch (mac_type) { + case OTX_CPT_SHA1: + sha1 = (struct sha1_state *) in_pad; + swap_data32(sha1->state, SHA1_DIGEST_SIZE); + memcpy(out_pad, &sha1->state, SHA1_DIGEST_SIZE); + break; + + case OTX_CPT_SHA256: + sha256 = (struct sha256_state *) in_pad; + swap_data32(sha256->state, SHA256_DIGEST_SIZE); + memcpy(out_pad, &sha256->state, SHA256_DIGEST_SIZE); + break; + + case OTX_CPT_SHA384: + case OTX_CPT_SHA512: + sha512 = (struct sha512_state *) in_pad; + swap_data64(sha512->state, SHA512_DIGEST_SIZE); + memcpy(out_pad, &sha512->state, SHA512_DIGEST_SIZE); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int aead_hmac_init(struct crypto_aead *cipher) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + int state_size = crypto_shash_statesize(ctx->hashalg); + int ds = crypto_shash_digestsize(ctx->hashalg); + int bs = crypto_shash_blocksize(ctx->hashalg); + int authkeylen = ctx->auth_key_len; + u8 *ipad = NULL, *opad = NULL; + int ret = 0, icount = 0; + + ctx->sdesc = alloc_sdesc(ctx->hashalg); + if (!ctx->sdesc) + return -ENOMEM; + + ctx->ipad = kzalloc(bs, GFP_KERNEL); + if (!ctx->ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + ctx->opad = kzalloc(bs, GFP_KERNEL); + if (!ctx->opad) { + ret = -ENOMEM; + goto calc_fail; + } + + ipad = kzalloc(state_size, GFP_KERNEL); + if (!ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + opad = kzalloc(state_size, GFP_KERNEL); + if (!opad) { + ret = -ENOMEM; + goto calc_fail; + } + + if (authkeylen > bs) { + ret = crypto_shash_digest(&ctx->sdesc->shash, ctx->key, + authkeylen, ipad); + if (ret) + goto calc_fail; + + authkeylen = ds; + } else { + memcpy(ipad, ctx->key, authkeylen); + } + + memset(ipad + authkeylen, 0, bs - authkeylen); + memcpy(opad, ipad, bs); + + for (icount = 0; icount < bs; icount++) { + ipad[icount] ^= 0x36; + opad[icount] ^= 0x5c; + } + + /* + * Partial Hash calculated from the software + * algorithm is retrieved for IPAD & OPAD + */ + + /* IPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, ipad, bs); + crypto_shash_export(&ctx->sdesc->shash, ipad); + ret = copy_pad(ctx->mac_type, ctx->ipad, ipad); + if (ret) + goto calc_fail; + + /* OPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, opad, bs); + crypto_shash_export(&ctx->sdesc->shash, opad); + ret = copy_pad(ctx->mac_type, ctx->opad, opad); + if (ret) + goto calc_fail; + + kfree(ipad); + kfree(opad); + + return 0; + +calc_fail: + kfree(ctx->ipad); + ctx->ipad = NULL; + kfree(ctx->opad); + ctx->opad = NULL; + kfree(ipad); + kfree(opad); + kfree(ctx->sdesc); + ctx->sdesc = NULL; + + return ret; +} + +static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct crypto_authenc_key_param *param; + int enckeylen = 0, authkeylen = 0; + struct rtattr *rta = (void *)key; + int status = -EINVAL; + + if (!RTA_OK(rta, keylen)) + goto badkey; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (keylen < enckeylen) + goto badkey; + + if (keylen > OTX_CPT_MAX_KEY_SIZE) + goto badkey; + + authkeylen = keylen - enckeylen; + memcpy(ctx->key, key, keylen); + + switch (enckeylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + /* Invalid key length */ + goto badkey; + } + + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = authkeylen; + + status = aead_hmac_init(cipher); + if (status) + goto badkey; + + return 0; +badkey: + return status; +} + +static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct crypto_authenc_key_param *param; + struct rtattr *rta = (void *)key; + int enckeylen = 0; + + if (!RTA_OK(rta, keylen)) + goto badkey; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (enckeylen != 0) + goto badkey; + + if (keylen > OTX_CPT_MAX_KEY_SIZE) + goto badkey; + + memcpy(ctx->key, key, keylen); + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = keylen; + return 0; +badkey: + return -EINVAL; +} + +static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + + /* + * For aes gcm we expect to get encryption key (16, 24, 32 bytes) + * and salt (4 bytes) + */ + switch (keylen) { + case AES_KEYSIZE_128 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_128_BIT; + ctx->enc_key_len = AES_KEYSIZE_128; + break; + case AES_KEYSIZE_192 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_192_BIT; + ctx->enc_key_len = AES_KEYSIZE_192; + break; + case AES_KEYSIZE_256 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_256_BIT; + ctx->enc_key_len = AES_KEYSIZE_256; + break; + default: + /* Invalid key and salt length */ + return -EINVAL; + } + + /* Store encryption key and salt */ + memcpy(ctx->key, key, keylen); + + return 0; +} + +static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc, + u32 *argcnt) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int mac_len = crypto_aead_authsize(tfm); + int ds; + + rctx->ctrl_word.e.enc_data_offset = req->assoclen; + + switch (ctx->cipher_type) { + case OTX_CPT_AES_CBC: + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key + ctx->auth_key_len, + ctx->enc_key_len); + /* Copy IV to context */ + memcpy(fctx->enc.encr_iv, req->iv, crypto_aead_ivsize(tfm)); + + ds = crypto_shash_digestsize(ctx->hashalg); + if (ctx->mac_type == OTX_CPT_SHA384) + ds = SHA512_DIGEST_SIZE; + if (ctx->ipad) + memcpy(fctx->hmac.e.ipad, ctx->ipad, ds); + if (ctx->opad) + memcpy(fctx->hmac.e.opad, ctx->opad, ds); + break; + + case OTX_CPT_AES_GCM: + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_DPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key, ctx->enc_key_len); + /* Copy salt to context */ + memcpy(fctx->enc.encr_iv, ctx->key + ctx->enc_key_len, + AES_GCM_SALT_SIZE); + + rctx->ctrl_word.e.iv_offset = req->assoclen - AES_GCM_IV_OFFSET; + break; + + default: + /* Unknown cipher type */ + return -EINVAL; + } + rctx->ctrl_word.flags = cpu_to_be64(rctx->ctrl_word.flags); + + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + if (enc) { + req_info->req.opcode.s.minor = 2; + req_info->req.param1 = req->cryptlen; + req_info->req.param2 = req->cryptlen + req->assoclen; + } else { + req_info->req.opcode.s.minor = 3; + req_info->req.param1 = req->cryptlen - mac_len; + req_info->req.param2 = req->cryptlen + req->assoclen - mac_len; + } + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type; + fctx->enc.enc_ctrl.e.mac_len = mac_len; + fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.flags); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx); + ++(*argcnt); + + return 0; +} + +static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, + u32 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_HMAC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + req_info->is_trunc_hmac = ctx->is_trunc_hmac; + + req_info->req.opcode.s.minor = 0; + req_info->req.param1 = ctx->auth_key_len; + req_info->req.param2 = ctx->mac_type << 8; + + /* Add authentication key */ + req_info->in[*argcnt].vptr = ctx->key; + req_info->in[*argcnt].size = round_up(ctx->auth_key_len, 8); + req_info->req.dlen += round_up(ctx->auth_key_len, 8); + ++(*argcnt); + + return 0; +} + +static inline u32 create_aead_input_list(struct aead_request *req, u32 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen = req->cryptlen + req->assoclen; + u32 status, argcnt = 0; + + status = create_aead_ctx_hdr(req, enc, &argcnt); + if (status) + return status; + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline u32 create_aead_output_list(struct aead_request *req, u32 enc, + u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0, outputlen = 0; + + if (enc) + outputlen = req->cryptlen + req->assoclen + mac_len; + else + outputlen = req->cryptlen + req->assoclen - mac_len; + + update_output_data(req_info, req->dst, 0, outputlen, &argcnt); + req_info->outcnt = argcnt; + + return 0; +} + +static inline u32 create_aead_null_input_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen, argcnt = 0; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + create_hmac_ctx_hdr(req, &argcnt, enc); + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline u32 create_aead_null_output_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct scatterlist *dst; + u8 *ptr = NULL; + int argcnt = 0, status, offset; + u32 inputlen; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + /* + * If source and destination are different + * then copy payload to destination + */ + if (req->src != req->dst) { + + ptr = kmalloc(inputlen, (req_info->areq->flags & + CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC); + if (!ptr) { + status = -ENOMEM; + goto error; + } + + status = sg_copy_to_buffer(req->src, sg_nents(req->src), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error; + } + status = sg_copy_from_buffer(req->dst, sg_nents(req->dst), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error; + } + kfree(ptr); + } + + if (enc) { + /* + * In an encryption scenario hmac needs + * to be appended after payload + */ + dst = req->dst; + offset = inputlen; + while (offset >= dst->length) { + offset -= dst->length; + dst = sg_next(dst); + if (!dst) { + status = -ENOENT; + goto error; + } + } + + update_output_data(req_info, dst, offset, mac_len, &argcnt); + } else { + /* + * In a decryption scenario calculated hmac for received + * payload needs to be compare with hmac received + */ + status = sg_copy_buffer(req->src, sg_nents(req->src), + rctx->fctx.hmac.s.hmac_recv, mac_len, + inputlen, true); + if (status != mac_len) { + status = -EINVAL; + goto error; + } + + req_info->out[argcnt].vptr = rctx->fctx.hmac.s.hmac_calc; + req_info->out[argcnt].size = mac_len; + argcnt++; + } + + req_info->outcnt = argcnt; + return 0; +error: + kfree(ptr); + return status; +} + +static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct pci_dev *pdev; + u32 status, cpu_num; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.flags = 0; + + req_info->callback = otx_cpt_aead_callback; + req_info->areq = &req->base; + req_info->req_type = reg_type; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + + switch (reg_type) { + case OTX_CPT_AEAD_ENC_DEC_REQ: + status = create_aead_input_list(req, enc); + if (status) + return status; + status = create_aead_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + case OTX_CPT_AEAD_ENC_DEC_NULL_REQ: + status = create_aead_null_input_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + status = create_aead_null_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + default: + return -EINVAL; + } + + /* Validate that request doesn't exceed maximum CPT supported size */ + if (req_info->req.param1 > OTX_CPT_MAX_REQ_SIZE || + req_info->req.param2 > OTX_CPT_MAX_REQ_SIZE) + return -E2BIG; + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->ctrl.s.grp = 0; + + status = otx_cpt_do_request(pdev, req_info, cpu_num); + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + return status; +} + +static int otx_cpt_aead_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, true); +} + +static int otx_cpt_aead_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, false); +} + +static int otx_cpt_aead_null_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, true); +} + +static int otx_cpt_aead_null_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, false); +} + +static struct skcipher_alg otx_cpt_skciphers[] = { { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "cpt_xts_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_xts_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cpt_cbc_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_cbc_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "cpt_ecb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_ecb_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cfb(aes)", + .base.cra_driver_name = "cpt_cfb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_cfb_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cpt_cbc_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = otx_cpt_skcipher_cbc_des3_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "cpt_ecb_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = 0, + .setkey = otx_cpt_skcipher_ecb_des3_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +} }; + +static struct aead_alg otx_cpt_aeads[] = { { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha1_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha1_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha256_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha256_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha384_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha384_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha512_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha512_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha1),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha1_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha1_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha256_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha256_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha384_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha384_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha512_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha512_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "cpt_rfc4106_gcm_aes", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_gcm_aes_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_gcm_aes_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_GCM_IV_SIZE, + .maxauthsize = AES_GCM_ICV_SIZE, +} }; + +static inline int is_any_alg_used(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++) + if (refcount_read(&otx_cpt_skciphers[i].base.cra_refcnt) != 1) + return true; + for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++) + if (refcount_read(&otx_cpt_aeads[i].base.cra_refcnt) != 1) + return true; + return false; +} + +static inline int cpt_register_algs(void) +{ + int i, err = 0; + + if (!IS_ENABLED(CONFIG_DM_CRYPT)) { + for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++) + otx_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + if (err) + return err; + } + + for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++) + otx_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads)); + if (err) { + crypto_unregister_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + return err; + } + + return 0; +} + +static inline void cpt_unregister_algs(void) +{ + crypto_unregister_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + crypto_unregister_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads)); +} + +static int compare_func(const void *lptr, const void *rptr) +{ + struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + + if (ldesc->dev->devfn < rdesc->dev->devfn) + return -1; + if (ldesc->dev->devfn > rdesc->dev->devfn) + return 1; + return 0; +} + +static void swap_func(void *lptr, void *rptr, int size) +{ + struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + struct cpt_device_desc desc; + + desc = *ldesc; + *ldesc = *rdesc; + *rdesc = desc; +} + +int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + enum otx_cptpf_type pf_type, + enum otx_cptvf_type engine_type, + int num_queues, int num_devices) +{ + int ret = 0; + int count; + + mutex_lock(&mutex); + switch (engine_type) { + case OTX_CPT_SE_TYPES: + count = atomic_read(&se_devices.count); + if (count >= CPT_MAX_VF_NUM) { + dev_err(&pdev->dev, "No space to add a new device"); + ret = -ENOSPC; + goto err; + } + se_devices.desc[count].pf_type = pf_type; + se_devices.desc[count].num_queues = num_queues; + se_devices.desc[count++].dev = pdev; + atomic_inc(&se_devices.count); + + if (atomic_read(&se_devices.count) == num_devices && + is_crypto_registered == false) { + if (cpt_register_algs()) { + dev_err(&pdev->dev, + "Error in registering crypto algorithms\n"); + ret = -EINVAL; + goto err; + } + try_module_get(mod); + is_crypto_registered = true; + } + sort(se_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + break; + + case OTX_CPT_AE_TYPES: + count = atomic_read(&ae_devices.count); + if (count >= CPT_MAX_VF_NUM) { + dev_err(&pdev->dev, "No space to a add new device"); + ret = -ENOSPC; + goto err; + } + ae_devices.desc[count].pf_type = pf_type; + ae_devices.desc[count].num_queues = num_queues; + ae_devices.desc[count++].dev = pdev; + atomic_inc(&ae_devices.count); + sort(ae_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + break; + + default: + dev_err(&pdev->dev, "Unknown VF type %d\n", engine_type); + ret = BAD_OTX_CPTVF_TYPE; + } +err: + mutex_unlock(&mutex); + return ret; +} + +void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod, + enum otx_cptvf_type engine_type) +{ + struct cpt_device_table *dev_tbl; + bool dev_found = false; + int i, j, count; + + mutex_lock(&mutex); + + dev_tbl = (engine_type == OTX_CPT_AE_TYPES) ? &ae_devices : &se_devices; + count = atomic_read(&dev_tbl->count); + for (i = 0; i < count; i++) + if (pdev == dev_tbl->desc[i].dev) { + for (j = i; j < count-1; j++) + dev_tbl->desc[j] = dev_tbl->desc[j+1]; + dev_found = true; + break; + } + + if (!dev_found) { + dev_err(&pdev->dev, "%s device not found", __func__); + goto exit; + } + + if (engine_type != OTX_CPT_AE_TYPES) { + if (atomic_dec_and_test(&se_devices.count) && + !is_any_alg_used()) { + cpt_unregister_algs(); + module_put(mod); + is_crypto_registered = false; + } + } else + atomic_dec(&ae_devices.count); +exit: + mutex_unlock(&mutex); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h new file mode 100644 index 000000000000..67cc0025f5d5 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_ALGS_H +#define __OTX_CPT_ALGS_H + +#include +#include "otx_cpt_common.h" + +#define OTX_CPT_MAX_ENC_KEY_SIZE 32 +#define OTX_CPT_MAX_HASH_KEY_SIZE 64 +#define OTX_CPT_MAX_KEY_SIZE (OTX_CPT_MAX_ENC_KEY_SIZE + \ + OTX_CPT_MAX_HASH_KEY_SIZE) +enum otx_cpt_request_type { + OTX_CPT_ENC_DEC_REQ = 0x1, + OTX_CPT_AEAD_ENC_DEC_REQ = 0x2, + OTX_CPT_AEAD_ENC_DEC_NULL_REQ = 0x3, + OTX_CPT_PASSTHROUGH_REQ = 0x4 +}; + +enum otx_cpt_major_opcodes { + OTX_CPT_MAJOR_OP_MISC = 0x01, + OTX_CPT_MAJOR_OP_FC = 0x33, + OTX_CPT_MAJOR_OP_HMAC = 0x35, +}; + +enum otx_cpt_req_type { + OTX_CPT_AE_CORE_REQ, + OTX_CPT_SE_CORE_REQ +}; + +enum otx_cpt_cipher_type { + OTX_CPT_CIPHER_NULL = 0x0, + OTX_CPT_DES3_CBC = 0x1, + OTX_CPT_DES3_ECB = 0x2, + OTX_CPT_AES_CBC = 0x3, + OTX_CPT_AES_ECB = 0x4, + OTX_CPT_AES_CFB = 0x5, + OTX_CPT_AES_CTR = 0x6, + OTX_CPT_AES_GCM = 0x7, + OTX_CPT_AES_XTS = 0x8 +}; + +enum otx_cpt_mac_type { + OTX_CPT_MAC_NULL = 0x0, + OTX_CPT_MD5 = 0x1, + OTX_CPT_SHA1 = 0x2, + OTX_CPT_SHA224 = 0x3, + OTX_CPT_SHA256 = 0x4, + OTX_CPT_SHA384 = 0x5, + OTX_CPT_SHA512 = 0x6, + OTX_CPT_GMAC = 0x7 +}; + +enum otx_cpt_aes_key_len { + OTX_CPT_AES_128_BIT = 0x1, + OTX_CPT_AES_192_BIT = 0x2, + OTX_CPT_AES_256_BIT = 0x3 +}; + +union otx_cpt_encr_ctrl { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 enc_cipher:4; + u64 reserved1:1; + u64 aes_key:2; + u64 iv_source:1; + u64 mac_type:4; + u64 reserved2:3; + u64 auth_input_type:1; + u64 mac_len:8; + u64 reserved3:8; + u64 encr_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 encr_offset:16; + u64 reserved3:8; + u64 mac_len:8; + u64 auth_input_type:1; + u64 reserved2:3; + u64 mac_type:4; + u64 iv_source:1; + u64 aes_key:2; + u64 reserved1:1; + u64 enc_cipher:4; +#endif + } e; +}; + +struct otx_cpt_cipher { + const char *name; + u8 value; +}; + +struct otx_cpt_enc_context { + union otx_cpt_encr_ctrl enc_ctrl; + u8 encr_key[32]; + u8 encr_iv[16]; +}; + +union otx_cpt_fchmac_ctx { + struct { + u8 ipad[64]; + u8 opad[64]; + } e; + struct { + u8 hmac_calc[64]; /* HMAC calculated */ + u8 hmac_recv[64]; /* HMAC received */ + } s; +}; + +struct otx_cpt_fc_ctx { + struct otx_cpt_enc_context enc; + union otx_cpt_fchmac_ctx hmac; +}; + +struct otx_cpt_enc_ctx { + u32 key_len; + u8 enc_key[OTX_CPT_MAX_KEY_SIZE]; + u8 cipher_type; + u8 key_type; +}; + +struct otx_cpt_des3_ctx { + u32 key_len; + u8 des3_key[OTX_CPT_MAX_KEY_SIZE]; +}; + +union otx_cpt_offset_ctrl_word { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved:32; + u64 enc_data_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 enc_data_offset:16; + u64 reserved:32; +#endif + } e; +}; + +struct otx_cpt_req_ctx { + struct otx_cpt_req_info cpt_req; + union otx_cpt_offset_ctrl_word ctrl_word; + struct otx_cpt_fc_ctx fctx; +}; + +struct otx_cpt_sdesc { + struct shash_desc shash; +}; + +struct otx_cpt_aead_ctx { + u8 key[OTX_CPT_MAX_KEY_SIZE]; + struct crypto_shash *hashalg; + struct otx_cpt_sdesc *sdesc; + u8 *ipad; + u8 *opad; + u32 enc_key_len; + u32 auth_key_len; + u8 cipher_type; + u8 mac_type; + u8 key_type; + u8 is_trunc_hmac; +}; +int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + enum otx_cptpf_type pf_type, + enum otx_cptvf_type engine_type, + int num_queues, int num_devices); +void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod, + enum otx_cptvf_type engine_type); +void otx_cpt_callback(int status, void *arg, void *req); + +#endif /* __OTX_CPT_ALGS_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c new file mode 100644 index 000000000000..a91860b5dc77 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c @@ -0,0 +1,985 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define DRV_NAME "octeontx-cptvf" +#define DRV_VERSION "1.0" + +static void vq_work_handler(unsigned long data) +{ + struct otx_cptvf_wqe_info *cwqe_info = + (struct otx_cptvf_wqe_info *) data; + + otx_cpt_post_process(&cwqe_info->vq_wqe[0]); +} + +static int init_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL); + if (!cwqe_info) + return -ENOMEM; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Creating VQ worker threads (%d)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) { + tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler, + (u64)cwqe_info); + cwqe_info->vq_wqe[i].cptvf = cptvf; + } + cptvf->wqe_info = cwqe_info; + + return 0; +} + +static void cleanup_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + if (!cwqe_info) + return; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Cleaning VQ worker threads (%u)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) + tasklet_kill(&cwqe_info->vq_wqe[i].twork); + + kzfree(cwqe_info); + cptvf->wqe_info = NULL; +} + +static void free_pending_queues(struct otx_cpt_pending_qinfo *pqinfo) +{ + struct otx_cpt_pending_queue *queue; + int i; + + for_each_pending_queue(pqinfo, queue, i) { + if (!queue->head) + continue; + + /* free single queue */ + kzfree((queue->head)); + queue->front = 0; + queue->rear = 0; + queue->qlen = 0; + } + pqinfo->num_queues = 0; +} + +static int alloc_pending_queues(struct otx_cpt_pending_qinfo *pqinfo, u32 qlen, + u32 num_queues) +{ + struct otx_cpt_pending_queue *queue = NULL; + size_t size; + int ret; + u32 i; + + pqinfo->num_queues = num_queues; + size = (qlen * sizeof(struct otx_cpt_pending_entry)); + + for_each_pending_queue(pqinfo, queue, i) { + queue->head = kzalloc((size), GFP_KERNEL); + if (!queue->head) { + ret = -ENOMEM; + goto pending_qfail; + } + + queue->pending_count = 0; + queue->front = 0; + queue->rear = 0; + queue->qlen = qlen; + + /* init queue spin lock */ + spin_lock_init(&queue->lock); + } + return 0; + +pending_qfail: + free_pending_queues(pqinfo); + + return ret; +} + +static int init_pending_queues(struct otx_cptvf *cptvf, u32 qlen, + u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + if (!num_queues) + return 0; + + ret = alloc_pending_queues(&cptvf->pqinfo, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + return ret; + } + return 0; +} + +static void cleanup_pending_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ pending queue (%u)\n", + cptvf->num_queues); + free_pending_queues(&cptvf->pqinfo); +} + +static void free_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo) +{ + struct otx_cpt_cmd_queue *queue = NULL; + struct otx_cpt_cmd_chunk *chunk = NULL; + struct pci_dev *pdev = cptvf->pdev; + int i; + + /* clean up for each queue */ + for (i = 0; i < cptvf->num_queues; i++) { + queue = &cqinfo->queue[i]; + + while (!list_empty(&cqinfo->queue[i].chead)) { + chunk = list_first_entry(&cqinfo->queue[i].chead, + struct otx_cpt_cmd_chunk, nextchunk); + + dma_free_coherent(&pdev->dev, chunk->size, + chunk->head, + chunk->dma_addr); + chunk->head = NULL; + chunk->dma_addr = 0; + list_del(&chunk->nextchunk); + kzfree(chunk); + } + queue->num_chunks = 0; + queue->idx = 0; + + } +} + +static int alloc_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo, + u32 qlen) +{ + struct otx_cpt_cmd_chunk *curr, *first, *last; + struct otx_cpt_cmd_queue *queue = NULL; + struct pci_dev *pdev = cptvf->pdev; + size_t q_size, c_size, rem_q_size; + u32 qcsize_bytes; + int i; + + + /* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */ + cptvf->qsize = min(qlen, cqinfo->qchunksize) * + OTX_CPT_NEXT_CHUNK_PTR_SIZE + 1; + /* Qsize in bytes to create space for alignment */ + q_size = qlen * OTX_CPT_INST_SIZE; + + qcsize_bytes = cqinfo->qchunksize * OTX_CPT_INST_SIZE; + + /* per queue initialization */ + for (i = 0; i < cptvf->num_queues; i++) { + c_size = 0; + rem_q_size = q_size; + first = NULL; + last = NULL; + + queue = &cqinfo->queue[i]; + INIT_LIST_HEAD(&queue->chead); + do { + curr = kzalloc(sizeof(*curr), GFP_KERNEL); + if (!curr) + goto cmd_qfail; + + c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes : + rem_q_size; + curr->head = dma_alloc_coherent(&pdev->dev, + c_size + OTX_CPT_NEXT_CHUNK_PTR_SIZE, + &curr->dma_addr, GFP_KERNEL); + if (!curr->head) { + dev_err(&pdev->dev, + "Command Q (%d) chunk (%d) allocation failed\n", + i, queue->num_chunks); + goto free_curr; + } + curr->size = c_size; + + if (queue->num_chunks == 0) { + first = curr; + queue->base = first; + } + list_add_tail(&curr->nextchunk, + &cqinfo->queue[i].chead); + + queue->num_chunks++; + rem_q_size -= c_size; + if (last) + *((u64 *)(&last->head[last->size])) = + (u64)curr->dma_addr; + + last = curr; + } while (rem_q_size); + + /* + * Make the queue circular, tie back last chunk entry to head + */ + curr = first; + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + queue->qhead = curr; + } + return 0; +free_curr: + kfree(curr); +cmd_qfail: + free_command_queues(cptvf, cqinfo); + return -ENOMEM; +} + +static int init_command_queues(struct otx_cptvf *cptvf, u32 qlen) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* setup command queues */ + ret = alloc_command_queues(cptvf, &cptvf->cqinfo, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to allocate command queues (%u)\n", + cptvf->num_queues); + return ret; + } + return ret; +} + +static void cleanup_command_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ command queue (%u)\n", + cptvf->num_queues); + free_command_queues(cptvf, &cptvf->cqinfo); +} + +static void cptvf_sw_cleanup(struct otx_cptvf *cptvf) +{ + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + cleanup_command_queues(cptvf); +} + +static int cptvf_sw_init(struct otx_cptvf *cptvf, u32 qlen, u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + u32 max_dev_queues = 0; + int ret; + + max_dev_queues = OTX_CPT_NUM_QS_PER_VF; + /* possible cpus */ + num_queues = min_t(u32, num_queues, max_dev_queues); + cptvf->num_queues = num_queues; + + ret = init_command_queues(cptvf, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to setup command queues (%u)\n", + num_queues); + return ret; + } + + ret = init_pending_queues(cptvf, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + goto setup_pqfail; + } + + /* Create worker threads for BH processing */ + ret = init_worker_threads(cptvf); + if (ret) { + dev_err(&pdev->dev, "Failed to setup worker threads\n"); + goto init_work_fail; + } + return 0; + +init_work_fail: + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + +setup_pqfail: + cleanup_command_queues(cptvf); + + return ret; +} + +static void cptvf_free_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL); + free_cpumask_var(cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_ctl(struct otx_cptvf *cptvf, bool val) +{ + union otx_cptx_vqx_ctl vqx_ctl; + + vqx_ctl.u = readq(cptvf->reg_base + OTX_CPT_VQX_CTL(0)); + vqx_ctl.s.ena = val; + writeq(vqx_ctl.u, cptvf->reg_base + OTX_CPT_VQX_CTL(0)); +} + +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_doorbell vqx_dbell; + + vqx_dbell.u = readq(cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); + vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */ + writeq(vqx_dbell.u, cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); +} + +static void cptvf_write_vq_inprog(struct otx_cptvf *cptvf, u8 val) +{ + union otx_cptx_vqx_inprog vqx_inprg; + + vqx_inprg.u = readq(cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); + vqx_inprg.s.inflight = val; + writeq(vqx_inprg.u, cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); +} + +static void cptvf_write_vq_done_numwait(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.num_wait = val; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + +static u32 cptvf_read_vq_done_numwait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.num_wait; +} + +static void cptvf_write_vq_done_timewait(struct otx_cptvf *cptvf, u16 time) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.time_wait = time; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + + +static u16 cptvf_read_vq_done_timewait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.time_wait; +} + +static void cptvf_enable_swerr_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable SWERR interrupts for the requested VF */ + vqx_misc_ena.s.swerr = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_mbox_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable MBOX interrupt for the requested VF */ + vqx_misc_ena.s.mbox = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_done_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_ena_w1s vqx_done_ena; + + vqx_done_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); + /* Enable DONE interrupt for the requested VF */ + vqx_done_ena.s.done = 1; + writeq(vqx_done_ena.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); +} + +static void cptvf_clear_dovf_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.dovf = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_irde_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.irde = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_nwrp_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.nwrp = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_mbox_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.mbox = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_swerr_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.swerr = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static u64 cptvf_read_vf_misc_intr_status(struct otx_cptvf *cptvf) +{ + return readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static irqreturn_t cptvf_misc_intr_handler(int __always_unused irq, + void *arg) +{ + struct otx_cptvf *cptvf = arg; + struct pci_dev *pdev = cptvf->pdev; + u64 intr; + + intr = cptvf_read_vf_misc_intr_status(cptvf); + /* Check for MISC interrupt types */ + if (likely(intr & OTX_CPT_VF_INTR_MBOX_MASK)) { + dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + otx_cptvf_handle_mbox_intr(cptvf); + cptvf_clear_mbox_intr(cptvf); + } else if (unlikely(intr & OTX_CPT_VF_INTR_DOVF_MASK)) { + cptvf_clear_dovf_intr(cptvf); + /* Clear doorbell count */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + dev_err(&pdev->dev, + "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_IRDE_MASK)) { + cptvf_clear_irde_intr(cptvf); + dev_err(&pdev->dev, + "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_NWRP_MASK)) { + cptvf_clear_nwrp_intr(cptvf); + dev_err(&pdev->dev, + "NCB response write error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_SERR_MASK)) { + cptvf_clear_swerr_intr(cptvf); + dev_err(&pdev->dev, + "Software error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else { + dev_err(&pdev->dev, "Unhandled interrupt in OTX_CPT VF %d\n", + cptvf->vfid); + } + + return IRQ_HANDLED; +} + +static inline struct otx_cptvf_wqe *get_cptvf_vq_wqe(struct otx_cptvf *cptvf, + int qno) +{ + struct otx_cptvf_wqe_info *nwqe_info; + + if (unlikely(qno >= cptvf->num_queues)) + return NULL; + nwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + + return &nwqe_info->vq_wqe[qno]; +} + +static inline u32 cptvf_read_vq_done_count(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done vqx_done; + + vqx_done.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE(0)); + return vqx_done.s.done; +} + +static inline void cptvf_write_vq_done_ack(struct otx_cptvf *cptvf, + u32 ackcnt) +{ + union otx_cptx_vqx_done_ack vqx_dack_cnt; + + vqx_dack_cnt.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); + vqx_dack_cnt.s.done_ack = ackcnt; + writeq(vqx_dack_cnt.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); +} + +static irqreturn_t cptvf_done_intr_handler(int __always_unused irq, + void *cptvf_dev) +{ + struct otx_cptvf *cptvf = (struct otx_cptvf *)cptvf_dev; + struct pci_dev *pdev = cptvf->pdev; + /* Read the number of completions */ + u32 intr = cptvf_read_vq_done_count(cptvf); + + if (intr) { + struct otx_cptvf_wqe *wqe; + + /* + * Acknowledge the number of scheduled completions for + * processing + */ + cptvf_write_vq_done_ack(cptvf, intr); + wqe = get_cptvf_vq_wqe(cptvf, 0); + if (unlikely(!wqe)) { + dev_err(&pdev->dev, "No work to schedule for VF (%d)", + cptvf->vfid); + return IRQ_NONE; + } + tasklet_hi_schedule(&wqe->twork); + } + + return IRQ_HANDLED; +} + +static void cptvf_set_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + struct pci_dev *pdev = cptvf->pdev; + int cpu; + + if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], + GFP_KERNEL)) { + dev_err(&pdev->dev, + "Allocation failed for affinity_mask for VF %d", + cptvf->vfid); + return; + } + + cpu = cptvf->vfid % num_online_cpus(); + cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), + cptvf->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(pdev, vec), + cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_saddr(struct otx_cptvf *cptvf, u64 val) +{ + union otx_cptx_vqx_saddr vqx_saddr; + + vqx_saddr.u = val; + writeq(vqx_saddr.u, cptvf->reg_base + OTX_CPT_VQX_SADDR(0)); +} + +static void cptvf_device_init(struct otx_cptvf *cptvf) +{ + u64 base_addr = 0; + + /* Disable the VQ */ + cptvf_write_vq_ctl(cptvf, 0); + /* Reset the doorbell */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + /* Clear inflight */ + cptvf_write_vq_inprog(cptvf, 0); + /* Write VQ SADDR */ + base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr); + cptvf_write_vq_saddr(cptvf, base_addr); + /* Configure timerhold / coalescence */ + cptvf_write_vq_done_timewait(cptvf, OTX_CPT_TIMER_HOLD); + cptvf_write_vq_done_numwait(cptvf, OTX_CPT_COUNT_HOLD); + /* Enable the VQ */ + cptvf_write_vq_ctl(cptvf, 1); + /* Flag the VF ready */ + cptvf->flags |= OTX_CPT_FLAG_DEVICE_READY; +} + +static ssize_t vf_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + char *msg; + + switch (cptvf->vftype) { + case OTX_CPT_AE_TYPES: + msg = "AE"; + break; + + case OTX_CPT_SE_TYPES: + msg = "SE"; + break; + + default: + msg = "Invalid"; + } + + return scnprintf(buf, PAGE_SIZE, "%s\n", msg); +} + +static ssize_t vf_engine_group_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", cptvf->vfgrp); +} + +static ssize_t vf_engine_group_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + int val, ret; + + ret = kstrtoint(buf, 10, &val); + if (ret) + return ret; + + if (val < 0) + return -EINVAL; + + if (val >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Engine group >= than max available groups %d", + OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + ret = otx_cptvf_send_vf_to_grp_msg(cptvf, val); + if (ret) + return ret; + + return count; +} + +static ssize_t vf_coalesc_time_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + cptvf_read_vq_done_timewait(cptvf)); +} + +static ssize_t vf_coalesc_num_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + cptvf_read_vq_done_numwait(cptvf)); +} + +static ssize_t vf_coalesc_time_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_TIME_WAIT || + val > OTX_CPT_COALESC_MAX_TIME_WAIT) + return -EINVAL; + + cptvf_write_vq_done_timewait(cptvf, val); + return count; +} + +static ssize_t vf_coalesc_num_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_NUM_WAIT || + val > OTX_CPT_COALESC_MAX_NUM_WAIT) + return -EINVAL; + + cptvf_write_vq_done_numwait(cptvf, val); + return count; +} + +static DEVICE_ATTR_RO(vf_type); +static DEVICE_ATTR_RW(vf_engine_group); +static DEVICE_ATTR_RW(vf_coalesc_time_wait); +static DEVICE_ATTR_RW(vf_coalesc_num_wait); + +static struct attribute *otx_cptvf_attrs[] = { + &dev_attr_vf_type.attr, + &dev_attr_vf_engine_group.attr, + &dev_attr_vf_coalesc_time_wait.attr, + &dev_attr_vf_coalesc_num_wait.attr, + NULL +}; + +static const struct attribute_group otx_cptvf_sysfs_group = { + .attrs = otx_cptvf_attrs, +}; + +static int otx_cptvf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cptvf *cptvf; + int err; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto disable_device; + } + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto release_regions; + } + + /* MAP PF's configuration registers */ + cptvf->reg_base = pci_iomap(pdev, OTX_CPT_VF_PCI_CFG_BAR, 0); + if (!cptvf->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto release_regions; + } + + cptvf->node = dev_to_node(&pdev->dev); + err = pci_alloc_irq_vectors(pdev, OTX_CPT_VF_MSIX_VECTORS, + OTX_CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for #%d msix vectors failed\n", + OTX_CPT_VF_MSIX_VECTORS); + goto unmap_region; + } + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), + cptvf_misc_intr_handler, 0, "CPT VF misc intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request misc irq"); + goto free_vectors; + } + + /* Enable mailbox interrupt */ + cptvf_enable_mbox_interrupts(cptvf); + cptvf_enable_swerr_interrupts(cptvf); + + /* Check cpt pf status, gets chip ID / device Id from PF if ready */ + err = otx_cptvf_check_pf_ready(cptvf); + if (err) + goto free_misc_irq; + + /* CPT VF software resources initialization */ + cptvf->cqinfo.qchunksize = OTX_CPT_CMD_QCHUNK_SIZE; + err = cptvf_sw_init(cptvf, OTX_CPT_CMD_QLEN, OTX_CPT_NUM_QS_PER_VF); + if (err) { + dev_err(dev, "cptvf_sw_init() failed"); + goto free_misc_irq; + } + /* Convey VQ LEN to PF */ + err = otx_cptvf_send_vq_size_msg(cptvf); + if (err) + goto sw_cleanup; + + /* CPT VF device initialization */ + cptvf_device_init(cptvf); + /* Send msg to PF to assign currnet Q to required group */ + err = otx_cptvf_send_vf_to_grp_msg(cptvf, cptvf->vfgrp); + if (err) + goto sw_cleanup; + + cptvf->priority = 1; + err = otx_cptvf_send_vf_priority_msg(cptvf); + if (err) + goto sw_cleanup; + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), + cptvf_done_intr_handler, 0, "CPT VF done intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request done irq\n"); + goto free_done_irq; + } + + /* Enable done interrupt */ + cptvf_enable_done_interrupts(cptvf); + + /* Set irq affinity masks */ + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + + err = otx_cptvf_send_vf_up(cptvf); + if (err) + goto free_irq_affinity; + + /* Initialize algorithms and set ops */ + err = otx_cpt_crypto_init(pdev, THIS_MODULE, + cptvf->vftype == OTX_CPT_SE_TYPES ? OTX_CPT_SE : OTX_CPT_AE, + cptvf->vftype, 1, cptvf->num_vfs); + if (err) { + dev_err(dev, "Failed to register crypto algs\n"); + goto free_irq_affinity; + } + + err = sysfs_create_group(&dev->kobj, &otx_cptvf_sysfs_group); + if (err) { + dev_err(dev, "Creating sysfs entries failed\n"); + goto crypto_exit; + } + + return 0; + +crypto_exit: + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); +free_irq_affinity: + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); +free_done_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); +sw_cleanup: + cptvf_sw_cleanup(cptvf); +free_misc_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); +free_vectors: + pci_free_irq_vectors(cptvf->pdev); +unmap_region: + pci_iounmap(pdev, cptvf->reg_base); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cptvf_remove(struct pci_dev *pdev) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) { + dev_err(&pdev->dev, "Invalid CPT-VF device\n"); + return; + } + + /* Convey DOWN to PF */ + if (otx_cptvf_send_vf_down(cptvf)) { + dev_err(&pdev->dev, "PF not responding to DOWN msg"); + } else { + sysfs_remove_group(&pdev->dev.kobj, &otx_cptvf_sysfs_group); + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); + cptvf_sw_cleanup(cptvf); + pci_free_irq_vectors(cptvf->pdev); + pci_iounmap(pdev, cptvf->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + } +} + +/* Supported devices */ +static const struct pci_device_id otx_cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, OTX_CPT_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cptvf_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cptvf_id_table, + .probe = otx_cptvf_probe, + .remove = otx_cptvf_remove, +}; + +module_pci_driver(otx_cptvf_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cptvf_id_table); diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c new file mode 100644 index 000000000000..5663787c7a62 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include "otx_cptvf.h" + +#define CPT_MBOX_MSG_TIMEOUT 2000 + +static char *get_mbox_opcode_str(int msg_opcode) +{ + char *str = "Unknown"; + + switch (msg_opcode) { + case OTX_CPT_MSG_VF_UP: + str = "UP"; + break; + + case OTX_CPT_MSG_VF_DOWN: + str = "DOWN"; + break; + + case OTX_CPT_MSG_READY: + str = "READY"; + break; + + case OTX_CPT_MSG_QLEN: + str = "QLEN"; + break; + + case OTX_CPT_MSG_QBIND_GRP: + str = "QBIND_GRP"; + break; + + case OTX_CPT_MSG_VQ_PRIORITY: + str = "VQ_PRIORITY"; + break; + + case OTX_CPT_MSG_PF_TYPE: + str = "PF_TYPE"; + break; + + case OTX_CPT_MSG_ACK: + str = "ACK"; + break; + + case OTX_CPT_MSG_NACK: + str = "NACK"; + break; + } + return str; +} + +static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id) +{ + char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE]; + + hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8, + raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false); + if (vf_id >= 0) + pr_debug("MBOX msg %s received from VF%d raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), vf_id, + raw_data_str); + else + pr_debug("MBOX msg %s received from PF raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), raw_data_str); +} + +static void cptvf_send_msg_to_pf(struct otx_cptvf *cptvf, + struct otx_cpt_mbox *mbx) +{ + /* Writing mbox(1) causes interrupt */ + writeq(mbx->msg, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0)); + writeq(mbx->data, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1)); +} + +/* Interrupt handler to handle mailbox messages from VFs */ +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0)); + mbx.data = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1)); + + dump_mbox_msg(&mbx, -1); + + switch (mbx.msg) { + case OTX_CPT_MSG_VF_UP: + cptvf->pf_acked = true; + cptvf->num_vfs = mbx.data; + break; + case OTX_CPT_MSG_READY: + cptvf->pf_acked = true; + cptvf->vfid = mbx.data; + dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid); + break; + case OTX_CPT_MSG_QBIND_GRP: + cptvf->pf_acked = true; + cptvf->vftype = mbx.data; + dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n", + cptvf->vfid, + ((mbx.data == OTX_CPT_SE_TYPES) ? "SE" : "AE"), + cptvf->vfgrp); + break; + case OTX_CPT_MSG_ACK: + cptvf->pf_acked = true; + break; + case OTX_CPT_MSG_NACK: + cptvf->pf_nacked = true; + break; + default: + dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n", + mbx.msg); + break; + } +} + +static int cptvf_send_msg_to_pf_timeout(struct otx_cptvf *cptvf, + struct otx_cpt_mbox *mbx) +{ + int timeout = CPT_MBOX_MSG_TIMEOUT; + int sleep = 10; + + cptvf->pf_acked = false; + cptvf->pf_nacked = false; + cptvf_send_msg_to_pf(cptvf, mbx); + /* Wait for previous message to be acked, timeout 2sec */ + while (!cptvf->pf_acked) { + if (cptvf->pf_nacked) + return -EINVAL; + msleep(sleep); + if (cptvf->pf_acked) + break; + timeout -= sleep; + if (!timeout) { + dev_err(&cptvf->pdev->dev, + "PF didn't ack to mbox msg %llx from VF%u\n", + mbx->msg, cptvf->vfid); + return -EBUSY; + } + } + return 0; +} + +/* + * Checks if VF is able to comminicate with PF + * and also gets the CPT number this VF is associated to. + */ +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_READY; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF. + * Must be ACKed. + */ +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_QLEN; + mbx.data = cptvf->qsize; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_QBIND_GRP; + /* Convey group of the VF */ + mbx.data = group; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + if (ret) + return ret; + cptvf->vfgrp = group; + + return 0; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VQ_PRIORITY; + /* Convey group of the VF */ + mbx.data = cptvf->priority; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate to PF that VF is UP and running + */ +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VF_UP; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} + +/* + * Communicate to PF that VF is DOWN and running + */ +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_VF_DOWN; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + + return ret; +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c new file mode 100644 index 000000000000..df839b880354 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" + +/* Completion code size and initial value */ +#define COMPLETION_CODE_SIZE 8 +#define COMPLETION_CODE_INIT 0 + +/* SG list header size in bytes */ +#define SG_LIST_HDR_SIZE 8 + +/* Default timeout when waiting for free pending entry in us */ +#define CPT_PENTRY_TIMEOUT 1000 +#define CPT_PENTRY_STEP 50 + +/* Default threshold for stopping and resuming sender requests */ +#define CPT_IQ_STOP_MARGIN 128 +#define CPT_IQ_RESUME_MARGIN 512 + +#define CPT_DMA_ALIGN 128 + +void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req) +{ + int i; + + pr_debug("Gather list size %d\n", req->incnt); + for (i = 0; i < req->incnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->in[i].size, req->in[i].vptr, + (void *) req->in[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", + req->in[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->in[i].vptr, req->in[i].size, false); + } + + pr_debug("Scatter list size %d\n", req->outcnt); + for (i = 0; i < req->outcnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->out[i].size, req->out[i].vptr, + (void *) req->out[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->out[i].vptr, req->out[i].size, false); + } +} + +static inline struct otx_cpt_pending_entry *get_free_pending_entry( + struct otx_cpt_pending_queue *q, + int qlen) +{ + struct otx_cpt_pending_entry *ent = NULL; + + ent = &q->head[q->rear]; + if (unlikely(ent->busy)) + return NULL; + + q->rear++; + if (unlikely(q->rear == qlen)) + q->rear = 0; + + return ent; +} + +static inline u32 modulo_inc(u32 index, u32 length, u32 inc) +{ + if (WARN_ON(inc > length)) + inc = length; + + index += inc; + if (unlikely(index >= length)) + index -= length; + + return index; +} + +static inline void free_pentry(struct otx_cpt_pending_entry *pentry) +{ + pentry->completion_addr = NULL; + pentry->info = NULL; + pentry->callback = NULL; + pentry->areq = NULL; + pentry->resume_sender = false; + pentry->busy = false; +} + +static inline int setup_sgio_components(struct pci_dev *pdev, + struct otx_cpt_buf_ptr *list, + int buf_count, u8 *buffer) +{ + struct otx_cpt_sglist_component *sg_ptr = NULL; + int ret = 0, i, j; + int components; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input list pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (likely(list[i].vptr)) { + list[i].dma_addr = dma_map_single(&pdev->dev, + list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, + list[i].dma_addr))) { + dev_err(&pdev->dev, "Dma mapping failed\n"); + ret = -EIO; + goto sg_cleanup; + } + } + } + + components = buf_count / 4; + sg_ptr = (struct otx_cpt_sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); + sg_ptr++; + } + components = buf_count % 4; + + switch (components) { + case 3: + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + /* Fall through */ + case 2: + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + /* Fall through */ + case 1: + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + break; + default: + break; + } + return ret; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[i].dma_addr, + list[i].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + return ret; +} + +static inline int setup_sgio_list(struct pci_dev *pdev, + struct otx_cpt_info_buffer **pinfo, + struct otx_cpt_req_info *req, gfp_t gfp) +{ + u32 dlen, align_dlen, info_len, rlen; + struct otx_cpt_info_buffer *info; + u16 g_sz_bytes, s_sz_bytes; + int align = CPT_DMA_ALIGN; + u32 total_mem_len; + + if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT || + req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) { + dev_err(&pdev->dev, "Error too many sg components\n"); + return -EINVAL; + } + + g_sz_bytes = ((req->incnt + 3) / 4) * + sizeof(struct otx_cpt_sglist_component); + s_sz_bytes = ((req->outcnt + 3) / 4) * + sizeof(struct otx_cpt_sglist_component); + + dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; + align_dlen = ALIGN(dlen, align); + info_len = ALIGN(sizeof(*info), align); + rlen = ALIGN(sizeof(union otx_cpt_res_s), align); + total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE; + + info = kzalloc(total_mem_len, gfp); + if (unlikely(!info)) { + dev_err(&pdev->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + *pinfo = info; + info->dlen = dlen; + info->in_buffer = (u8 *)info + info_len; + + ((u16 *)info->in_buffer)[0] = req->outcnt; + ((u16 *)info->in_buffer)[1] = req->incnt; + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + *(u64 *)info->in_buffer = cpu_to_be64p((u64 *)info->in_buffer); + + /* Setup gather (input) components */ + if (setup_sgio_components(pdev, req->in, req->incnt, + &info->in_buffer[8])) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + return -EFAULT; + } + + if (setup_sgio_components(pdev, req->out, req->outcnt, + &info->in_buffer[8 + g_sz_bytes])) { + dev_err(&pdev->dev, "Failed to setup scatter list\n"); + return -EFAULT; + } + + info->dma_len = total_mem_len - info_len; + info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer, + info->dma_len, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { + dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); + return -EIO; + } + /* + * Get buffer for union otx_cpt_res_s response + * structure and its physical address + */ + info->completion_addr = (u64 *)(info->in_buffer + align_dlen); + info->comp_baddr = info->dptr_baddr + align_dlen; + + /* Create and initialize RPTR */ + info->out_buffer = (u8 *)info->completion_addr + rlen; + info->rptr_baddr = info->comp_baddr + rlen; + + *((u64 *) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT); + + return 0; +} + + +static void cpt_fill_inst(union otx_cpt_inst_s *inst, + struct otx_cpt_info_buffer *info, + struct otx_cpt_iq_cmd *cmd) +{ + inst->u[0] = 0x0; + inst->s.doneint = true; + inst->s.res_addr = (u64)info->comp_baddr; + inst->u[2] = 0x0; + inst->s.wq_ptr = 0; + inst->s.ei0 = cmd->cmd.u64; + inst->s.ei1 = cmd->dptr; + inst->s.ei2 = cmd->rptr; + inst->s.ei3 = cmd->cptr.u64; +} + +/* + * On OcteonTX platform the parameter db_count is used as a count for ringing + * door bell. The valid values for db_count are: + * 0 - 1 CPT instruction will be enqueued however CPT will not be informed + * 1 - 1 CPT instruction will be enqueued and CPT will be informed + */ +static void cpt_send_cmd(union otx_cpt_inst_s *cptinst, struct otx_cptvf *cptvf) +{ + struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo; + struct otx_cpt_cmd_queue *queue; + struct otx_cpt_cmd_chunk *curr; + u8 *ent; + + queue = &qinfo->queue[0]; + /* + * cpt_send_cmd is currently called only from critical section + * therefore no locking is required for accessing instruction queue + */ + ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE]; + memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE); + + if (++queue->idx >= queue->qhead->size / 64) { + curr = queue->qhead; + + if (list_is_last(&curr->nextchunk, &queue->chead)) + queue->qhead = queue->base; + else + queue->qhead = list_next_entry(queue->qhead, nextchunk); + queue->idx = 0; + } + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + otx_cptvf_write_vq_doorbell(cptvf, 1); +} + +static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + struct otx_cpt_pending_queue *pqueue, + struct otx_cptvf *cptvf) +{ + struct otx_cptvf_request *cpt_req = &req->req; + struct otx_cpt_pending_entry *pentry = NULL; + union otx_cpt_ctrl_info *ctrl = &req->ctrl; + struct otx_cpt_info_buffer *info = NULL; + union otx_cpt_res_s *result = NULL; + struct otx_cpt_iq_cmd iq_cmd; + union otx_cpt_inst_s cptinst; + int retry, ret = 0; + u8 resume_sender; + gfp_t gfp; + + gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : + GFP_ATOMIC; + ret = setup_sgio_list(pdev, &info, req, gfp); + if (unlikely(ret)) { + dev_err(&pdev->dev, "Setting up SG list failed"); + goto request_cleanup; + } + cpt_req->dlen = info->dlen; + + result = (union otx_cpt_res_s *) info->completion_addr; + result->s.compcode = COMPLETION_CODE_INIT; + + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP; + while (unlikely(!pentry) && retry--) { + spin_unlock_bh(&pqueue->lock); + udelay(CPT_PENTRY_STEP); + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + } + + if (unlikely(!pentry)) { + ret = -ENOSPC; + spin_unlock_bh(&pqueue->lock); + goto request_cleanup; + } + + /* + * Check if we are close to filling in entire pending queue, + * if so then tell the sender to stop/sleep by returning -EBUSY + * We do it only for context which can sleep (GFP_KERNEL) + */ + if (gfp == GFP_KERNEL && + pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) { + pentry->resume_sender = true; + } else + pentry->resume_sender = false; + resume_sender = pentry->resume_sender; + pqueue->pending_count++; + + pentry->completion_addr = info->completion_addr; + pentry->info = info; + pentry->callback = req->callback; + pentry->areq = req->areq; + pentry->busy = true; + info->pentry = pentry; + info->time_in = jiffies; + info->req = req; + + /* Fill in the command */ + iq_cmd.cmd.u64 = 0; + iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); + iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); + iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); + iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); + + /* 64-bit swap for microcode data reads, not needed for addresses*/ + iq_cmd.cmd.u64 = cpu_to_be64(iq_cmd.cmd.u64); + iq_cmd.dptr = info->dptr_baddr; + iq_cmd.rptr = info->rptr_baddr; + iq_cmd.cptr.u64 = 0; + iq_cmd.cptr.s.grp = ctrl->s.grp; + + /* Fill in the CPT_INST_S type command for HW interpretation */ + cpt_fill_inst(&cptinst, info, &iq_cmd); + + /* Print debug info if enabled */ + otx_cpt_dump_sg_list(pdev, req); + pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE); + print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX_CPT_INST_SIZE, false); + pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen); + print_hex_dump_debug("", 0, 16, 1, info->in_buffer, + cpt_req->dlen, false); + + /* Send CPT command */ + cpt_send_cmd(&cptinst, cptvf); + + /* + * We allocate and prepare pending queue entry in critical section + * together with submitting CPT instruction to CPT instruction queue + * to make sure that order of CPT requests is the same in both + * pending and instruction queues + */ + spin_unlock_bh(&pqueue->lock); + + ret = resume_sender ? -EBUSY : -EINPROGRESS; + return ret; + +request_cleanup: + do_request_cleanup(pdev, info); + return ret; +} + +int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + int cpu_num) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!otx_cpt_device_ready(cptvf)) { + dev_err(&pdev->dev, "CPT Device is not ready"); + return -ENODEV; + } + + if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request", + cptvf->vfid); + return -EINVAL; + } else if ((cptvf->vftype == OTX_CPT_AE_TYPES) && + (req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request", + cptvf->vfid); + return -EINVAL; + } + + return process_request(pdev, req, &cptvf->pqinfo.queue[0], cptvf); +} + +static int cpt_process_ccode(struct pci_dev *pdev, + union otx_cpt_res_s *cpt_status, + struct otx_cpt_info_buffer *cpt_info, + struct otx_cpt_req_info *req, u32 *res_code) +{ + u8 ccode = cpt_status->s.compcode; + union otx_cpt_error_code ecode; + + ecode.u = be64_to_cpu(*((u64 *) cpt_info->out_buffer)); + switch (ccode) { + case CPT_COMP_E_FAULT: + dev_err(&pdev->dev, + "Request failed with DMA fault\n"); + otx_cpt_dump_sg_list(pdev, req); + break; + + case CPT_COMP_E_SWERR: + dev_err(&pdev->dev, + "Request failed with software error code %d\n", + ecode.s.ccode); + otx_cpt_dump_sg_list(pdev, req); + break; + + case CPT_COMP_E_HWERR: + dev_err(&pdev->dev, + "Request failed with hardware error\n"); + otx_cpt_dump_sg_list(pdev, req); + break; + + case COMPLETION_CODE_INIT: + /* check for timeout */ + if (time_after_eq(jiffies, cpt_info->time_in + + OTX_CPT_COMMAND_TIMEOUT * HZ)) + dev_warn(&pdev->dev, "Request timed out 0x%p", req); + else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) { + cpt_info->time_in = jiffies; + cpt_info->extra_time++; + } + return 1; + + case CPT_COMP_E_GOOD: + /* Check microcode completion code */ + if (ecode.s.ccode) { + /* + * If requested hmac is truncated and ucode returns + * s/g write length error then we report success + * because ucode writes as many bytes of calculated + * hmac as available in gather buffer and reports + * s/g write length error if number of bytes in gather + * buffer is less than full hmac size. + */ + if (req->is_trunc_hmac && + ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) { + *res_code = 0; + break; + } + + dev_err(&pdev->dev, + "Request failed with software error code 0x%x\n", + ecode.s.ccode); + otx_cpt_dump_sg_list(pdev, req); + break; + } + + /* Request has been processed with success */ + *res_code = 0; + break; + + default: + dev_err(&pdev->dev, "Request returned invalid status\n"); + break; + } + + return 0; +} + +static inline void process_pending_queue(struct pci_dev *pdev, + struct otx_cpt_pending_queue *pqueue) +{ + void (*callback)(int status, void *arg1, void *arg2); + struct otx_cpt_pending_entry *resume_pentry = NULL; + struct otx_cpt_pending_entry *pentry = NULL; + struct otx_cpt_info_buffer *cpt_info = NULL; + union otx_cpt_res_s *cpt_status = NULL; + struct otx_cpt_req_info *req = NULL; + struct crypto_async_request *areq; + u32 res_code, resume_index; + + while (1) { + spin_lock_bh(&pqueue->lock); + pentry = &pqueue->head[pqueue->front]; + + if (WARN_ON(!pentry)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + res_code = -EINVAL; + if (unlikely(!pentry->busy)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + if (unlikely(!pentry->callback)) { + dev_err(&pdev->dev, "Callback NULL\n"); + goto process_pentry; + } + + cpt_info = pentry->info; + if (unlikely(!cpt_info)) { + dev_err(&pdev->dev, "Pending entry post arg NULL\n"); + goto process_pentry; + } + + req = cpt_info->req; + if (unlikely(!req)) { + dev_err(&pdev->dev, "Request NULL\n"); + goto process_pentry; + } + + cpt_status = (union otx_cpt_res_s *) pentry->completion_addr; + if (unlikely(!cpt_status)) { + dev_err(&pdev->dev, "Completion address NULL\n"); + goto process_pentry; + } + + if (cpt_process_ccode(pdev, cpt_status, cpt_info, req, + &res_code)) { + spin_unlock_bh(&pqueue->lock); + return; + } + cpt_info->pdev = pdev; + +process_pentry: + /* + * Check if we should inform sending side to resume + * We do it CPT_IQ_RESUME_MARGIN elements in advance before + * pending queue becomes empty + */ + resume_index = modulo_inc(pqueue->front, pqueue->qlen, + CPT_IQ_RESUME_MARGIN); + resume_pentry = &pqueue->head[resume_index]; + if (resume_pentry && + resume_pentry->resume_sender) { + resume_pentry->resume_sender = false; + callback = resume_pentry->callback; + areq = resume_pentry->areq; + + if (callback) { + spin_unlock_bh(&pqueue->lock); + + /* + * EINPROGRESS is an indication for sending + * side that it can resume sending requests + */ + callback(-EINPROGRESS, areq, cpt_info); + spin_lock_bh(&pqueue->lock); + } + } + + callback = pentry->callback; + areq = pentry->areq; + free_pentry(pentry); + + pqueue->pending_count--; + pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1); + spin_unlock_bh(&pqueue->lock); + + /* + * Call callback after current pending entry has been + * processed, we don't do it if the callback pointer is + * invalid. + */ + if (callback) + callback(res_code, areq, cpt_info); + } +} + +void otx_cpt_post_process(struct otx_cptvf_wqe *wqe) +{ + process_pending_queue(wqe->cptvf->pdev, &wqe->cptvf->pqinfo.queue[0]); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h new file mode 100644 index 000000000000..a4c9ff730b13 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_REQUEST_MANAGER_H +#define __OTX_CPTVF_REQUEST_MANAGER_H + +#include +#include +#include +#include "otx_cpt_hw_types.h" + +/* + * Maximum total number of SG buffers is 100, we divide it equally + * between input and output + */ +#define OTX_CPT_MAX_SG_IN_CNT 50 +#define OTX_CPT_MAX_SG_OUT_CNT 50 + +/* DMA mode direct or SG */ +#define OTX_CPT_DMA_DIRECT_DIRECT 0 +#define OTX_CPT_DMA_GATHER_SCATTER 1 + +/* Context source CPTR or DPTR */ +#define OTX_CPT_FROM_CPTR 0 +#define OTX_CPT_FROM_DPTR 1 + +/* CPT instruction queue alignment */ +#define OTX_CPT_INST_Q_ALIGNMENT 128 +#define OTX_CPT_MAX_REQ_SIZE 65535 + +/* Default command timeout in seconds */ +#define OTX_CPT_COMMAND_TIMEOUT 4 +#define OTX_CPT_TIMER_HOLD 0x03F +#define OTX_CPT_COUNT_HOLD 32 +#define OTX_CPT_TIME_IN_RESET_COUNT 5 + +/* Minimum and maximum values for interrupt coalescing */ +#define OTX_CPT_COALESC_MIN_TIME_WAIT 0x0 +#define OTX_CPT_COALESC_MAX_TIME_WAIT ((1<<16)-1) +#define OTX_CPT_COALESC_MIN_NUM_WAIT 0x0 +#define OTX_CPT_COALESC_MAX_NUM_WAIT ((1<<20)-1) + +union otx_cpt_opcode_info { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +struct otx_cptvf_request { + u32 param1; + u32 param2; + u16 dlen; + union otx_cpt_opcode_info opcode; +}; + +struct otx_cpt_buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +union otx_cpt_ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved0:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1; /* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved0:26; +#endif + } s; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union otx_cpt_iq_cmd_word0 { + u64 u64; + struct { + u16 opcode; + u16 param1; + u16 param2; + u16 dlen; + } s; +}; + +union otx_cpt_iq_cmd_word3 { + u64 u64; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 grp:3; + u64 cptr:61; +#else + u64 cptr:61; + u64 grp:3; +#endif + } s; +}; + +struct otx_cpt_iq_cmd { + union otx_cpt_iq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union otx_cpt_iq_cmd_word3 cptr; +}; + +struct otx_cpt_sglist_component { + union { + u64 len; + struct { + u16 len0; + u16 len1; + u16 len2; + u16 len3; + } s; + } u; + u64 ptr0; + u64 ptr1; + u64 ptr2; + u64 ptr3; +}; + +struct otx_cpt_pending_entry { + u64 *completion_addr; /* Completion address */ + struct otx_cpt_info_buffer *info; + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + u8 resume_sender; /* Notify sender to resume sending requests */ + u8 busy; /* Entry status (free/busy) */ +}; + +struct otx_cpt_pending_queue { + struct otx_cpt_pending_entry *head; /* Head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + u32 pending_count; /* Pending requests count */ + u32 qlen; /* Queue length */ + spinlock_t lock; /* Queue lock */ +}; + +struct otx_cpt_req_info { + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + struct otx_cptvf_request req;/* Request information (core specific) */ + union otx_cpt_ctrl_info ctrl;/* User control information */ + struct otx_cpt_buf_ptr in[OTX_CPT_MAX_SG_IN_CNT]; + struct otx_cpt_buf_ptr out[OTX_CPT_MAX_SG_OUT_CNT]; + u8 *iv_out; /* IV to send back */ + u16 rlen; /* Output length */ + u8 incnt; /* Number of input buffers */ + u8 outcnt; /* Number of output buffers */ + u8 req_type; /* Type of request */ + u8 is_enc; /* Is a request an encryption request */ + u8 is_trunc_hmac;/* Is truncated hmac used */ +}; + +struct otx_cpt_info_buffer { + struct otx_cpt_pending_entry *pentry; + struct otx_cpt_req_info *req; + struct pci_dev *pdev; + u64 *completion_addr; + u8 *out_buffer; + u8 *in_buffer; + dma_addr_t dptr_baddr; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + unsigned long time_in; + u32 dlen; + u32 dma_len; + u8 extra_time; +}; + +static inline void do_request_cleanup(struct pci_dev *pdev, + struct otx_cpt_info_buffer *info) +{ + struct otx_cpt_req_info *req; + int i; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dma_len, DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->outcnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->incnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + kzfree(info); +} + +struct otx_cptvf_wqe; +void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req); +void otx_cpt_post_process(struct otx_cptvf_wqe *wqe); +int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + int cpu_num); + +#endif /* __OTX_CPTVF_REQUEST_MANAGER_H */ From 6482023b9d3350bf1b756ef36e1ea1a1c871879c Mon Sep 17 00:00:00 2001 From: SrujanaChalla Date: Fri, 13 Mar 2020 17:17:08 +0530 Subject: [PATCH 149/162] crypto: marvell - enable OcteonTX cpt options for build Add the OcteonTX cpt options in crypto Kconfig and Makefile Signed-off-by: SrujanaChalla Signed-off-by: Herbert Xu --- drivers/crypto/marvell/Kconfig | 16 ++++++++++++++++ drivers/crypto/marvell/Makefile | 1 + 2 files changed, 17 insertions(+) diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig index 8262b14c6287..13063384f958 100644 --- a/drivers/crypto/marvell/Kconfig +++ b/drivers/crypto/marvell/Kconfig @@ -19,3 +19,19 @@ config CRYPTO_DEV_MARVELL_CESA Security Accelerator (CESA) which can be found on MVEBU and ORION platforms. This driver supports CPU offload through DMA transfers. + +config CRYPTO_DEV_OCTEONTX_CPT + tristate "Support for Marvell OcteonTX CPT driver" + depends on ARCH_THUNDER || COMPILE_TEST + depends on PCI_MSI && 64BIT + depends on CRYPTO_LIB_AES + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select CRYPTO_AEAD + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Marvell Cryptographic + Accelerator Unit(CPT) found in OcteonTX series of processors. + + To compile this driver as module, choose M here: + the modules will be called octeontx-cpt and octeontx-cptvf diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile index 2030b0b55e91..6c6a1519b0f1 100644 --- a/drivers/crypto/marvell/Makefile +++ b/drivers/crypto/marvell/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += cesa/ +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx/ From f6913d040c8d2d8294d0a06c9d2a2aa4a02fb8c0 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 14 Mar 2020 10:44:41 +0000 Subject: [PATCH 150/162] crypto: chelsio - remove set but not used variable 'adap' drivers/crypto/chelsio/chcr_algo.c: In function 'chcr_device_init': drivers/crypto/chelsio/chcr_algo.c:1440:18: warning: variable 'adap' set but not used [-Wunused-but-set-variable] commit 567be3a5d227 ("crypto: chelsio - Use multiple txq/rxq per tfm to process the requests") involved this unused variable. Signed-off-by: YueHaibing Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chcr_algo.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 8952732c0b7d..c29b80dd30d8 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -1437,7 +1437,6 @@ static int chcr_aes_decrypt(struct skcipher_request *req) static int chcr_device_init(struct chcr_context *ctx) { struct uld_ctx *u_ctx = NULL; - struct adapter *adap; int txq_perchan, ntxq; int err = 0, rxq_perchan; @@ -1448,7 +1447,6 @@ static int chcr_device_init(struct chcr_context *ctx) goto out; } ctx->dev = &u_ctx->dev; - adap = padap(ctx->dev); ntxq = u_ctx->lldi.ntxq; rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; txq_perchan = ntxq / u_ctx->lldi.nchan; From 4ccff767914be253eddacc6d29ec1a8336284b00 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 18 Mar 2020 02:00:40 +0000 Subject: [PATCH 151/162] crypto: ccree - remove duplicated include from cc_aead.c Remove duplicated include. Signed-off-by: YueHaibing Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_aead.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 875fa79a03eb..1cf51edbc4b9 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -9,7 +9,6 @@ #include #include #include -#include #include "cc_driver.h" #include "cc_buffer_mgr.h" #include "cc_aead.h" From f0ac02c791a178df807c0a079a2372b8d825652b Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 19 Mar 2020 09:12:25 -0700 Subject: [PATCH 152/162] crypto: caam - allocate RNG instantiation descriptor with GFP_DMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Be consistent with the rest of the codebase and use GFP_DMA when allocating memory for a CAAM JR descriptor. Signed-off-by: Andrey Smirnov Reviewed-by: Horia Geantă Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-imx@nxp.com Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 7139366da016..7f7f2960b0cc 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -196,7 +196,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask) u32 *desc, status; int sh_idx, ret = 0; - desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL); + desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA); if (!desc) return -ENOMEM; @@ -273,7 +273,7 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, int ret = 0, sh_idx; ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; - desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL); + desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA); if (!desc) return -ENOMEM; From 8483c831b9f3fca2219ac45f361386a1233f6c9b Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 19 Mar 2020 09:12:26 -0700 Subject: [PATCH 153/162] crypto: caam - use struct hwrng's .init for initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make caamrng code a bit more symmetric by moving initialization code to .init hook of struct hwrng. Signed-off-by: Andrey Smirnov Reviewed-by: Horia Geantă Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamrng.c | 47 ++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 34cbb4a56f05..69a02ac5de54 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -256,6 +256,7 @@ static void caam_cleanup(struct hwrng *rng) } rng_unmap_ctx(rng_ctx); + caam_jr_free(rng_ctx->jrdev); } static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) @@ -274,28 +275,43 @@ static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) return 0; } -static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev) +static int caam_init(struct hwrng *rng) { + struct caam_rng_ctx *ctx = rng_ctx; int err; - ctx->jrdev = jrdev; + ctx->jrdev = caam_jr_alloc(); + err = PTR_ERR_OR_ZERO(ctx->jrdev); + if (err) { + pr_err("Job Ring Device allocation for transform failed\n"); + return err; + } err = rng_create_sh_desc(ctx); if (err) - return err; + goto free_jrdev; ctx->current_buf = 0; ctx->cur_buf_idx = 0; err = caam_init_buf(ctx, 0); if (err) - return err; + goto free_jrdev; - return caam_init_buf(ctx, 1); + err = caam_init_buf(ctx, 1); + if (err) + goto free_jrdev; + + return 0; + +free_jrdev: + caam_jr_free(ctx->jrdev); + return err; } static struct hwrng caam_rng = { .name = "rng-caam", + .init = caam_init, .cleanup = caam_cleanup, .read = caam_read, }; @@ -305,14 +321,12 @@ void caam_rng_exit(void) if (!init_done) return; - caam_jr_free(rng_ctx->jrdev); hwrng_unregister(&caam_rng); kfree(rng_ctx); } int caam_rng_init(struct device *ctrldev) { - struct device *dev; u32 rng_inst; struct caam_drv_private *priv = dev_get_drvdata(ctrldev); int err; @@ -328,21 +342,11 @@ int caam_rng_init(struct device *ctrldev) if (!rng_inst) return 0; - dev = caam_jr_alloc(); - if (IS_ERR(dev)) { - pr_err("Job Ring Device allocation for transform failed\n"); - return PTR_ERR(dev); - } rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL); - if (!rng_ctx) { - err = -ENOMEM; - goto free_caam_alloc; - } - err = caam_init_rng(rng_ctx, dev); - if (err) - goto free_rng_ctx; + if (!rng_ctx) + return -ENOMEM; - dev_info(dev, "registering rng-caam\n"); + dev_info(ctrldev, "registering rng-caam\n"); err = hwrng_register(&caam_rng); if (!err) { @@ -350,9 +354,6 @@ int caam_rng_init(struct device *ctrldev) return err; } -free_rng_ctx: kfree(rng_ctx); -free_caam_alloc: - caam_jr_free(dev); return err; } From 1517f63cd84f00da3a3e21dff042410b2799c1c3 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 19 Mar 2020 09:12:27 -0700 Subject: [PATCH 154/162] crypto: caam - drop global context pointer and init_done MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leverage devres to get rid of code storing global context as well as init_done flag. Original code also has a circular deallocation dependency where unregister_algs() -> caam_rng_exit() -> caam_jr_free() chain would only happen if all of JRs were freed. Fix this by moving caam_rng_exit() outside of unregister_algs() and doing it specifically for JR that instantiated HWRNG. Signed-off-by: Andrey Smirnov Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamrng.c | 67 +++++++++++++++++------------------ drivers/crypto/caam/intern.h | 7 ++-- drivers/crypto/caam/jr.c | 13 ++++--- 3 files changed, 44 insertions(+), 43 deletions(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 69a02ac5de54..753625f2b2c0 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -70,6 +70,7 @@ struct buf_data { /* rng per-device context */ struct caam_rng_ctx { + struct hwrng rng; struct device *jrdev; dma_addr_t sh_desc_dma; u32 sh_desc[DESC_RNG_LEN]; @@ -78,13 +79,10 @@ struct caam_rng_ctx { struct buf_data bufs[2]; }; -static struct caam_rng_ctx *rng_ctx; - -/* - * Variable used to avoid double free of resources in case - * algorithm registration was unsuccessful - */ -static bool init_done; +static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r) +{ + return (struct caam_rng_ctx *)r->priv; +} static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd) { @@ -143,7 +141,7 @@ static inline int submit_job(struct caam_rng_ctx *ctx, int to_current) static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait) { - struct caam_rng_ctx *ctx = rng_ctx; + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); struct buf_data *bd = &ctx->bufs[ctx->current_buf]; int next_buf_idx, copied_idx; int err; @@ -246,17 +244,18 @@ static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id) static void caam_cleanup(struct hwrng *rng) { + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); int i; struct buf_data *bd; for (i = 0; i < 2; i++) { - bd = &rng_ctx->bufs[i]; + bd = &ctx->bufs[i]; if (atomic_read(&bd->empty) == BUF_PENDING) wait_for_completion(&bd->filled); } - rng_unmap_ctx(rng_ctx); - caam_jr_free(rng_ctx->jrdev); + rng_unmap_ctx(ctx); + caam_jr_free(ctx->jrdev); } static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) @@ -277,7 +276,7 @@ static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) static int caam_init(struct hwrng *rng) { - struct caam_rng_ctx *ctx = rng_ctx; + struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); int err; ctx->jrdev = caam_jr_alloc(); @@ -309,28 +308,19 @@ free_jrdev: return err; } -static struct hwrng caam_rng = { - .name = "rng-caam", - .init = caam_init, - .cleanup = caam_cleanup, - .read = caam_read, -}; +int caam_rng_init(struct device *ctrldev); -void caam_rng_exit(void) +void caam_rng_exit(struct device *ctrldev) { - if (!init_done) - return; - - hwrng_unregister(&caam_rng); - kfree(rng_ctx); + devres_release_group(ctrldev, caam_rng_init); } int caam_rng_init(struct device *ctrldev) { + struct caam_rng_ctx *ctx; u32 rng_inst; struct caam_drv_private *priv = dev_get_drvdata(ctrldev); - int err; - init_done = false; + int ret; /* Check for an instantiated RNG before registration */ if (priv->era < 10) @@ -342,18 +332,27 @@ int caam_rng_init(struct device *ctrldev) if (!rng_inst) return 0; - rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL); - if (!rng_ctx) + if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL)) return -ENOMEM; + ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_DMA | GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->rng.name = "rng-caam"; + ctx->rng.init = caam_init; + ctx->rng.cleanup = caam_cleanup; + ctx->rng.read = caam_read; + ctx->rng.priv = (unsigned long)ctx; + dev_info(ctrldev, "registering rng-caam\n"); - err = hwrng_register(&caam_rng); - if (!err) { - init_done = true; - return err; + ret = devm_hwrng_register(ctrldev, &ctx->rng); + if (ret) { + caam_rng_exit(ctrldev); + return ret; } - kfree(rng_ctx); - return err; + devres_close_group(ctrldev, caam_rng_init); + return 0; } diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 230ea88184f5..402d6a362e8c 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -47,6 +47,7 @@ struct caam_drv_private_jr { struct caam_job_ring __iomem *rregs; /* JobR's register space */ struct tasklet_struct irqtask; int irq; /* One per queue */ + bool hwrng; /* Number of scatterlist crypt transforms active on the JobR */ atomic_t tfm_count ____cacheline_aligned; @@ -163,7 +164,7 @@ static inline void caam_pkc_exit(void) #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API int caam_rng_init(struct device *dev); -void caam_rng_exit(void); +void caam_rng_exit(struct device *dev); #else @@ -172,9 +173,7 @@ static inline int caam_rng_init(struct device *dev) return 0; } -static inline void caam_rng_exit(void) -{ -} +static inline void caam_rng_exit(struct device *dev) {} #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 88aff2aefd5d..4af22e7ceb4f 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -27,7 +27,8 @@ static struct jr_driver_data driver_data; static DEFINE_MUTEX(algs_lock); static unsigned int active_devs; -static void register_algs(struct device *dev) +static void register_algs(struct caam_drv_private_jr *jrpriv, + struct device *dev) { mutex_lock(&algs_lock); @@ -37,7 +38,7 @@ static void register_algs(struct device *dev) caam_algapi_init(dev); caam_algapi_hash_init(dev); caam_pkc_init(dev); - caam_rng_init(dev); + jrpriv->hwrng = !caam_rng_init(dev); caam_qi_algapi_init(dev); algs_unlock: @@ -53,7 +54,6 @@ static void unregister_algs(void) caam_qi_algapi_exit(); - caam_rng_exit(); caam_pkc_exit(); caam_algapi_hash_exit(); caam_algapi_exit(); @@ -135,6 +135,9 @@ static int caam_jr_remove(struct platform_device *pdev) jrdev = &pdev->dev; jrpriv = dev_get_drvdata(jrdev); + if (jrpriv->hwrng) + caam_rng_exit(jrdev->parent); + /* * Return EBUSY if job ring already allocated. */ @@ -514,7 +517,7 @@ static int caam_jr_probe(struct platform_device *pdev) int error; jrdev = &pdev->dev; - jrpriv = devm_kmalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL); + jrpriv = devm_kzalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL); if (!jrpriv) return -ENOMEM; @@ -590,7 +593,7 @@ static int caam_jr_probe(struct platform_device *pdev) atomic_set(&jrpriv->tfm_count, 0); - register_algs(jrdev->parent); + register_algs(jrpriv, jrdev->parent); return 0; } From 2c5e88dc90f50022d1b4bf56c9b45d4162757094 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 19 Mar 2020 09:12:28 -0700 Subject: [PATCH 155/162] crypto: caam - simplify RNG implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework CAAM RNG implementation as follows: - Make use of the fact that HWRNG supports partial reads and will handle such cases gracefully by removing recursion in caam_read() - Convert blocking caam_read() codepath to do a single blocking job read directly into requested buffer, bypassing any intermediary buffers - Convert async caam_read() codepath into a simple single reader/single writer FIFO use-case, thus simplifying concurrency handling and delegating buffer read/write position management to KFIFO subsystem. - Leverage the same low level RNG data extraction code for both async and blocking caam_read() scenarios, get rid of the shared job descriptor and make non-shared one as a simple as possible (just HEADER + ALGORITHM OPERATION + FIFO STORE) - Split private context from DMA related memory, so that the former could be allocated without GFP_DMA. NOTE: On its face value this commit decreased throughput numbers reported by dd if=/dev/hwrng of=/dev/null bs=1 count=100K [iflag=nonblock] by about 15%, however commits that enable prediction resistance and limit JR total size impact the performance so much and move the bottleneck such as to make this regression irrelevant. NOTE: On the bright side, this commit reduces RNG in kernel DMA buffer memory usage from 2 x RN_BUF_SIZE (~256K) to 32K. Signed-off-by: Andrey Smirnov Reviewed-by: Horia Geantă Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamrng.c | 350 ++++++++++++---------------------- 1 file changed, 121 insertions(+), 229 deletions(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 753625f2b2c0..e64f2d77fa03 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -7,35 +7,12 @@ * * Based on caamalg.c crypto API driver. * - * relationship between job descriptors to shared descriptors: - * - * --------------- -------------- - * | JobDesc #0 |-------------------->| ShareDesc | - * | *(buffer 0) | |------------->| (generate) | - * --------------- | | (move) | - * | | (store) | - * --------------- | -------------- - * | JobDesc #1 |------| - * | *(buffer 1) | - * --------------- - * - * A job desc looks like this: - * - * --------------------- - * | Header | - * | ShareDesc Pointer | - * | SEQ_OUT_PTR | - * | (output buffer) | - * --------------------- - * - * The SharedDesc never changes, and each job descriptor points to one of two - * buffers for each device, from which the data will be copied into the - * requested destination */ #include #include #include +#include #include "compat.h" @@ -45,38 +22,26 @@ #include "jr.h" #include "error.h" +#define CAAM_RNG_MAX_FIFO_STORE_SIZE U16_MAX + +#define CAAM_RNG_FIFO_LEN SZ_32K /* Must be a multiple of 2 */ + /* - * Maximum buffer size: maximum number of random, cache-aligned bytes that - * will be generated and moved to seq out ptr (extlen not allowed) + * Length of used descriptors, see caam_init_desc() */ -#define RN_BUF_SIZE (0xffff / L1_CACHE_BYTES * \ - L1_CACHE_BYTES) - -/* length of descriptors */ -#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ_MAX * 2) -#define DESC_RNG_LEN (3 * CAAM_CMD_SZ) - -/* Buffer, its dma address and lock */ -struct buf_data { - u8 buf[RN_BUF_SIZE] ____cacheline_aligned; - dma_addr_t addr; - struct completion filled; - u32 hw_desc[DESC_JOB_O_LEN]; -#define BUF_NOT_EMPTY 0 -#define BUF_EMPTY 1 -#define BUF_PENDING 2 /* Empty, but with job pending --don't submit another */ - atomic_t empty; -}; +#define CAAM_RNG_DESC_LEN (CAAM_CMD_SZ + \ + CAAM_CMD_SZ + \ + CAAM_CMD_SZ + CAAM_PTR_SZ_MAX) /* rng per-device context */ struct caam_rng_ctx { struct hwrng rng; struct device *jrdev; - dma_addr_t sh_desc_dma; - u32 sh_desc[DESC_RNG_LEN]; - unsigned int cur_buf_idx; - int current_buf; - struct buf_data bufs[2]; + struct device *ctrldev; + void *desc_async; + void *desc_sync; + struct work_struct worker; + struct kfifo fifo; }; static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r) @@ -84,194 +49,116 @@ static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r) return (struct caam_rng_ctx *)r->priv; } -static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd) +static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err, + void *context) { - if (bd->addr) - dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE, - DMA_FROM_DEVICE); -} - -static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx) -{ - struct device *jrdev = ctx->jrdev; - - if (ctx->sh_desc_dma) - dma_unmap_single(jrdev, ctx->sh_desc_dma, - desc_bytes(ctx->sh_desc), DMA_TO_DEVICE); - rng_unmap_buf(jrdev, &ctx->bufs[0]); - rng_unmap_buf(jrdev, &ctx->bufs[1]); -} - -static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context) -{ - struct buf_data *bd; - - bd = container_of(desc, struct buf_data, hw_desc[0]); + struct completion *done = context; if (err) caam_jr_strstatus(jrdev, err); - atomic_set(&bd->empty, BUF_NOT_EMPTY); - complete(&bd->filled); - - /* Buffer refilled, invalidate cache */ - dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE); - - print_hex_dump_debug("rng refreshed buf@: ", DUMP_PREFIX_ADDRESS, 16, 4, - bd->buf, RN_BUF_SIZE, 1); + complete(done); } -static inline int submit_job(struct caam_rng_ctx *ctx, int to_current) +static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len) { - struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)]; - struct device *jrdev = ctx->jrdev; - u32 *desc = bd->hw_desc; + init_job_desc(desc, 0); /* + 1 cmd_sz */ + /* Generate random bytes: + 1 cmd_sz */ + append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG); + /* Store bytes: + 1 cmd_sz + caam_ptr_sz */ + append_fifo_store(desc, dst_dma, len, FIFOST_TYPE_RNGSTORE); + + print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, + 16, 4, desc, desc_bytes(desc), 1); + + return desc; +} + +static int caam_rng_read_one(struct device *jrdev, + void *dst, int len, + void *desc, + struct completion *done) +{ + dma_addr_t dst_dma; int err; - dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf)); - init_completion(&bd->filled); - err = caam_jr_enqueue(jrdev, desc, rng_done, ctx); - if (err != -EINPROGRESS) - complete(&bd->filled); /* don't wait on failed job*/ - else - atomic_inc(&bd->empty); /* note if pending */ + len = min_t(int, len, CAAM_RNG_MAX_FIFO_STORE_SIZE); - return err; + dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE); + if (dma_mapping_error(jrdev, dst_dma)) { + dev_err(jrdev, "unable to map destination memory\n"); + return -ENOMEM; + } + + init_completion(done); + err = caam_jr_enqueue(jrdev, + caam_init_desc(desc, dst_dma, len), + caam_rng_done, done); + if (err == -EINPROGRESS) { + wait_for_completion(done); + err = 0; + } + + dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE); + + return err ?: len; } -static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait) +static void caam_rng_fill_async(struct caam_rng_ctx *ctx) +{ + struct scatterlist sg[1]; + struct completion done; + int len, nents; + + sg_init_table(sg, ARRAY_SIZE(sg)); + nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg), + CAAM_RNG_FIFO_LEN); + if (!nents) + return; + + len = caam_rng_read_one(ctx->jrdev, sg_virt(&sg[0]), + sg[0].length, + ctx->desc_async, + &done); + if (len < 0) + return; + + kfifo_dma_in_finish(&ctx->fifo, len); +} + +static void caam_rng_worker(struct work_struct *work) +{ + struct caam_rng_ctx *ctx = container_of(work, struct caam_rng_ctx, + worker); + caam_rng_fill_async(ctx); +} + +static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait) { struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); - struct buf_data *bd = &ctx->bufs[ctx->current_buf]; - int next_buf_idx, copied_idx; - int err; + int out; - if (atomic_read(&bd->empty)) { - /* try to submit job if there wasn't one */ - if (atomic_read(&bd->empty) == BUF_EMPTY) { - err = submit_job(ctx, 1); - /* if can't submit job, can't even wait */ - if (err != -EINPROGRESS) - return 0; - } - /* no immediate data, so exit if not waiting */ - if (!wait) - return 0; + if (wait) { + struct completion done; - /* waiting for pending job */ - if (atomic_read(&bd->empty)) - wait_for_completion(&bd->filled); + return caam_rng_read_one(ctx->jrdev, dst, max, + ctx->desc_sync, &done); } - next_buf_idx = ctx->cur_buf_idx + max; - dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n", - __func__, ctx->current_buf, ctx->cur_buf_idx); + out = kfifo_out(&ctx->fifo, dst, max); + if (kfifo_len(&ctx->fifo) <= CAAM_RNG_FIFO_LEN / 2) + schedule_work(&ctx->worker); - /* if enough data in current buffer */ - if (next_buf_idx < RN_BUF_SIZE) { - memcpy(data, bd->buf + ctx->cur_buf_idx, max); - ctx->cur_buf_idx = next_buf_idx; - return max; - } - - /* else, copy what's left... */ - copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx; - memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx); - ctx->cur_buf_idx = 0; - atomic_set(&bd->empty, BUF_EMPTY); - - /* ...refill... */ - submit_job(ctx, 1); - - /* and use next buffer */ - ctx->current_buf = !ctx->current_buf; - dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf); - - /* since there already is some data read, don't wait */ - return copied_idx + caam_read(rng, data + copied_idx, - max - copied_idx, false); -} - -static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx) -{ - struct device *jrdev = ctx->jrdev; - u32 *desc = ctx->sh_desc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Generate random bytes */ - append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG); - - /* Store bytes */ - append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE); - - ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } - - print_hex_dump_debug("rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4, - desc, desc_bytes(desc), 1); - - return 0; -} - -static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id) -{ - struct device *jrdev = ctx->jrdev; - struct buf_data *bd = &ctx->bufs[buf_id]; - u32 *desc = bd->hw_desc; - int sh_len = desc_len(ctx->sh_desc); - - init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER | - HDR_REVERSE); - - bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(jrdev, bd->addr)) { - dev_err(jrdev, "unable to map dst\n"); - return -ENOMEM; - } - - append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0); - - print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4, - desc, desc_bytes(desc), 1); - - return 0; + return out; } static void caam_cleanup(struct hwrng *rng) { struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); - int i; - struct buf_data *bd; - for (i = 0; i < 2; i++) { - bd = &ctx->bufs[i]; - if (atomic_read(&bd->empty) == BUF_PENDING) - wait_for_completion(&bd->filled); - } - - rng_unmap_ctx(ctx); + flush_work(&ctx->worker); caam_jr_free(ctx->jrdev); -} - -static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) -{ - struct buf_data *bd = &ctx->bufs[buf_id]; - int err; - - err = rng_create_job_desc(ctx, buf_id); - if (err) - return err; - - atomic_set(&bd->empty, BUF_EMPTY); - submit_job(ctx, buf_id == ctx->current_buf); - wait_for_completion(&bd->filled); - - return 0; + kfifo_free(&ctx->fifo); } static int caam_init(struct hwrng *rng) @@ -279,33 +166,36 @@ static int caam_init(struct hwrng *rng) struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng); int err; + ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, + GFP_DMA | GFP_KERNEL); + if (!ctx->desc_sync) + return -ENOMEM; + + ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN, + GFP_DMA | GFP_KERNEL); + if (!ctx->desc_async) + return -ENOMEM; + + if (kfifo_alloc(&ctx->fifo, CAAM_RNG_FIFO_LEN, GFP_DMA | GFP_KERNEL)) + return -ENOMEM; + + INIT_WORK(&ctx->worker, caam_rng_worker); + ctx->jrdev = caam_jr_alloc(); err = PTR_ERR_OR_ZERO(ctx->jrdev); if (err) { + kfifo_free(&ctx->fifo); pr_err("Job Ring Device allocation for transform failed\n"); return err; } - err = rng_create_sh_desc(ctx); - if (err) - goto free_jrdev; - - ctx->current_buf = 0; - ctx->cur_buf_idx = 0; - - err = caam_init_buf(ctx, 0); - if (err) - goto free_jrdev; - - err = caam_init_buf(ctx, 1); - if (err) - goto free_jrdev; + /* + * Fill async buffer to have early randomness data for + * hw_random + */ + caam_rng_fill_async(ctx); return 0; - -free_jrdev: - caam_jr_free(ctx->jrdev); - return err; } int caam_rng_init(struct device *ctrldev); @@ -335,10 +225,12 @@ int caam_rng_init(struct device *ctrldev) if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL)) return -ENOMEM; - ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_DMA | GFP_KERNEL); + ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; + ctx->ctrldev = ctrldev; + ctx->rng.name = "rng-caam"; ctx->rng.init = caam_init; ctx->rng.cleanup = caam_cleanup; From 32107e43b505de44ebe1917da2c8c6229acbd509 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 19 Mar 2020 09:12:29 -0700 Subject: [PATCH 156/162] crypto: caam - check if RNG job failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We shouldn't stay silent if RNG job fails. Add appropriate code to check for that case and propagate error code up appropriately. Signed-off-by: Andrey Smirnov Reviewed-by: Horia Geantă Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamrng.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index e64f2d77fa03..d0027d31e840 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -44,6 +44,11 @@ struct caam_rng_ctx { struct kfifo fifo; }; +struct caam_rng_job_ctx { + struct completion *done; + int *err; +}; + static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r) { return (struct caam_rng_ctx *)r->priv; @@ -52,12 +57,12 @@ static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r) static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err, void *context) { - struct completion *done = context; + struct caam_rng_job_ctx *jctx = context; if (err) - caam_jr_strstatus(jrdev, err); + *jctx->err = caam_jr_strstatus(jrdev, err); - complete(done); + complete(jctx->done); } static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len) @@ -80,7 +85,11 @@ static int caam_rng_read_one(struct device *jrdev, struct completion *done) { dma_addr_t dst_dma; - int err; + int err, ret = 0; + struct caam_rng_job_ctx jctx = { + .done = done, + .err = &ret, + }; len = min_t(int, len, CAAM_RNG_MAX_FIFO_STORE_SIZE); @@ -93,7 +102,7 @@ static int caam_rng_read_one(struct device *jrdev, init_completion(done); err = caam_jr_enqueue(jrdev, caam_init_desc(desc, dst_dma, len), - caam_rng_done, done); + caam_rng_done, &jctx); if (err == -EINPROGRESS) { wait_for_completion(done); err = 0; @@ -101,7 +110,7 @@ static int caam_rng_read_one(struct device *jrdev, dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE); - return err ?: len; + return err ?: (ret ?: len); } static void caam_rng_fill_async(struct caam_rng_ctx *ctx) From 551ce72a78e2c5493fa987410437e54b5f3fdd34 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 19 Mar 2020 09:12:30 -0700 Subject: [PATCH 157/162] crypto: caam - invalidate entropy register during RNG initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to make sure that we always use non-stale entropy data, change the code to invalidate entropy register during RNG initialization. Signed-off-by: Aymen Sghaier Signed-off-by: Vipul Kumar [andrew.smirnov@gmail.com ported to upstream kernel, rewrote commit msg] Signed-off-by: Andrey Smirnov Reviewed-by: Horia Geantă Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 11 ++++++++--- drivers/crypto/caam/regs.h | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 7f7f2960b0cc..b278471f4013 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -341,8 +341,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; r4tst = &ctrl->r4tst[0]; - /* put RNG4 into program mode */ - clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM); + /* + * Setting both RTMCTL:PRGM and RTMCTL:TRNG_ACC causes TRNG to + * properly invalidate the entropy in the entropy register and + * force re-generation. + */ + clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM | RTMCTL_ACC); /* * Performance-wise, it does not make sense to @@ -372,7 +376,8 @@ start_rng: * select raw sampling in both entropy shifter * and statistical checker; ; put RNG4 into run mode */ - clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC); + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC, + RTMCTL_SAMP_MODE_RAW_ES_SC); } static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl) diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 05127b70527d..c191e8fd0fa7 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -487,7 +487,8 @@ struct rngtst { /* RNG4 TRNG test registers */ struct rng4tst { -#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */ +#define RTMCTL_ACC BIT(5) /* TRNG access mode */ +#define RTMCTL_PRGM BIT(16) /* 1 -> program mode, 0 -> run mode */ #define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in both entropy shifter and statistical checker */ From 0544cb75bd7df357c1758b760ecc7709125c139a Mon Sep 17 00:00:00 2001 From: Andrei Botila Date: Thu, 19 Mar 2020 09:12:31 -0700 Subject: [PATCH 158/162] bus: fsl-mc: add api to retrieve mc version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new api that returns Management Complex firmware version and make the required structure public. The api's first user will be the caam driver for setting prediction resistance bits. Signed-off-by: Andrei Botila Acked-by: Laurentiu Tudor Reviewed-by: Horia Geantă Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Signed-off-by: Herbert Xu --- drivers/bus/fsl-mc/fsl-mc-bus.c | 33 +++++++++++++++++---------------- include/linux/fsl/mc.h | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index c78d10ea641f..40526da5c6a6 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -26,6 +26,8 @@ */ #define FSL_MC_DEFAULT_DMA_MASK (~0ULL) +static struct fsl_mc_version mc_version; + /** * struct fsl_mc - Private data of a "fsl,qoriq-mc" platform device * @root_mc_bus_dev: fsl-mc device representing the root DPRC @@ -54,20 +56,6 @@ struct fsl_mc_addr_translation_range { phys_addr_t start_phys_addr; }; -/** - * struct mc_version - * @major: Major version number: incremented on API compatibility changes - * @minor: Minor version number: incremented on API additions (that are - * backward compatible); reset when major version is incremented - * @revision: Internal revision number: incremented on implementation changes - * and/or bug fixes that have no impact on API - */ -struct mc_version { - u32 major; - u32 minor; - u32 revision; -}; - /** * fsl_mc_bus_match - device to driver matching callback * @dev: the fsl-mc device to match against @@ -338,7 +326,7 @@ EXPORT_SYMBOL_GPL(fsl_mc_driver_unregister); */ static int mc_get_version(struct fsl_mc_io *mc_io, u32 cmd_flags, - struct mc_version *mc_ver_info) + struct fsl_mc_version *mc_ver_info) { struct fsl_mc_command cmd = { 0 }; struct dpmng_rsp_get_version *rsp_params; @@ -363,6 +351,20 @@ static int mc_get_version(struct fsl_mc_io *mc_io, return 0; } +/** + * fsl_mc_get_version - function to retrieve the MC f/w version information + * + * Return: mc version when called after fsl-mc-bus probe; NULL otherwise. + */ +struct fsl_mc_version *fsl_mc_get_version(void) +{ + if (mc_version.major) + return &mc_version; + + return NULL; +} +EXPORT_SYMBOL_GPL(fsl_mc_get_version); + /** * fsl_mc_get_root_dprc - function to traverse to the root dprc */ @@ -862,7 +864,6 @@ static int fsl_mc_bus_probe(struct platform_device *pdev) int container_id; phys_addr_t mc_portal_phys_addr; u32 mc_portal_size; - struct mc_version mc_version; struct resource res; mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL); diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 54d9436600c7..2b5f8366dbe1 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -381,6 +381,22 @@ int __must_check __fsl_mc_driver_register(struct fsl_mc_driver *fsl_mc_driver, void fsl_mc_driver_unregister(struct fsl_mc_driver *driver); +/** + * struct fsl_mc_version + * @major: Major version number: incremented on API compatibility changes + * @minor: Minor version number: incremented on API additions (that are + * backward compatible); reset when major version is incremented + * @revision: Internal revision number: incremented on implementation changes + * and/or bug fixes that have no impact on API + */ +struct fsl_mc_version { + u32 major; + u32 minor; + u32 revision; +}; + +struct fsl_mc_version *fsl_mc_get_version(void); + int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, u16 mc_io_flags, struct fsl_mc_io **new_mc_io); From 358ba762d9f1d4ba99ab31ef12bc28014b22f4c9 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 19 Mar 2020 09:12:32 -0700 Subject: [PATCH 159/162] crypto: caam - enable prediction resistance in HRWNG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instantiate CAAM RNG with prediction resistance enabled to improve its quality (with PR on DRNG is forced to reseed from TRNG every time random data is generated). Management Complex firmware with version lower than 10.20.0 doesn't provide prediction resistance support. Consider this and only instantiate rng when mc f/w version is lower. Signed-off-by: Andrey Smirnov Signed-off-by: Andrei Botila Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 1 + drivers/crypto/caam/caamrng.c | 3 +- drivers/crypto/caam/ctrl.c | 73 ++++++++++++++++++++++++++++------- drivers/crypto/caam/desc.h | 2 + drivers/crypto/caam/regs.h | 4 +- 5 files changed, 68 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 64f82263b20e..a62f228be6da 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -13,6 +13,7 @@ config CRYPTO_DEV_FSL_CAAM depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE select SOC_BUS select CRYPTO_DEV_FSL_CAAM_COMMON + imply FSL_MC_BUS help Enables the driver module for Freescale's Cryptographic Accelerator and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index d0027d31e840..988bfddbadc6 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -69,7 +69,8 @@ static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len) { init_job_desc(desc, 0); /* + 1 cmd_sz */ /* Generate random bytes: + 1 cmd_sz */ - append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG); + append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG | + OP_ALG_PR_ON); /* Store bytes: + 1 cmd_sz + caam_ptr_sz */ append_fifo_store(desc, dst_dma, len, FIFOST_TYPE_RNGSTORE); diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index b278471f4013..4fcdd262e581 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "compat.h" #include "regs.h" @@ -36,7 +37,8 @@ static void build_instantiation_desc(u32 *desc, int handle, int do_sk) init_job_desc(desc, 0); op_flags = OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG | - (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT; + (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT | + OP_ALG_PR_ON; /* INIT RNG in non-test mode */ append_operation(desc, op_flags); @@ -278,12 +280,25 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, return -ENOMEM; for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) { + const u32 rdsta_if = RDSTA_IF0 << sh_idx; + const u32 rdsta_pr = RDSTA_PR0 << sh_idx; + const u32 rdsta_mask = rdsta_if | rdsta_pr; /* * If the corresponding bit is set, this state handle * was initialized by somebody else, so it's left alone. */ - if ((1 << sh_idx) & state_handle_mask) - continue; + if (rdsta_if & state_handle_mask) { + if (rdsta_pr & state_handle_mask) + continue; + + dev_info(ctrldev, + "RNG4 SH%d was previously instantiated without prediction resistance. Tearing it down\n", + sh_idx); + + ret = deinstantiate_rng(ctrldev, rdsta_if); + if (ret) + break; + } /* Create the descriptor for instantiating RNG State Handle */ build_instantiation_desc(desc, sh_idx, gen_sk); @@ -303,9 +318,9 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask, if (ret) break; - rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK; + rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_MASK; if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) || - !(rdsta_val & (1 << sh_idx))) { + (rdsta_val & rdsta_mask) != rdsta_mask) { ret = -EAGAIN; break; } @@ -564,6 +579,26 @@ static void caam_remove_debugfs(void *root) } #endif +#ifdef CONFIG_FSL_MC_BUS +static bool check_version(struct fsl_mc_version *mc_version, u32 major, + u32 minor, u32 revision) +{ + if (mc_version->major > major) + return true; + + if (mc_version->major == major) { + if (mc_version->minor > minor) + return true; + + if (mc_version->minor == minor && + mc_version->revision > revision) + return true; + } + + return false; +} +#endif + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -582,6 +617,7 @@ static int caam_probe(struct platform_device *pdev) u8 rng_vid; int pg_size; int BLOCK_OFFSET = 0; + bool pr_support = false; ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL); if (!ctrlpriv) @@ -667,6 +703,21 @@ static int caam_probe(struct platform_device *pdev) /* Get the IRQ of the controller (for security violations only) */ ctrlpriv->secvio_irq = irq_of_parse_and_map(nprop, 0); + np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc"); + ctrlpriv->mc_en = !!np; + of_node_put(np); + +#ifdef CONFIG_FSL_MC_BUS + if (ctrlpriv->mc_en) { + struct fsl_mc_version *mc_version; + + mc_version = fsl_mc_get_version(); + if (mc_version) + pr_support = check_version(mc_version, 10, 20, 0); + else + return -EPROBE_DEFER; + } +#endif /* * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel, @@ -674,10 +725,6 @@ static int caam_probe(struct platform_device *pdev) * In case of SoCs with Management Complex, MC f/w performs * the configuration. */ - np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc"); - ctrlpriv->mc_en = !!np; - of_node_put(np); - if (!ctrlpriv->mc_en) clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF | @@ -784,7 +831,7 @@ static int caam_probe(struct platform_device *pdev) * already instantiated, do RNG instantiation * In case of SoCs with Management Complex, RNG is managed by MC f/w. */ - if (!ctrlpriv->mc_en && rng_vid >= 4) { + if (!(ctrlpriv->mc_en && pr_support) && rng_vid >= 4) { ctrlpriv->rng4_sh_init = rd_reg32(&ctrl->r4tst[0].rdsta); /* @@ -794,11 +841,11 @@ static int caam_probe(struct platform_device *pdev) * to regenerate these keys before the next POR. */ gen_sk = ctrlpriv->rng4_sh_init & RDSTA_SKVN ? 0 : 1; - ctrlpriv->rng4_sh_init &= RDSTA_IFMASK; + ctrlpriv->rng4_sh_init &= RDSTA_MASK; do { int inst_handles = rd_reg32(&ctrl->r4tst[0].rdsta) & - RDSTA_IFMASK; + RDSTA_MASK; /* * If either SH were instantiated by somebody else * (e.g. u-boot) then it is assumed that the entropy @@ -838,7 +885,7 @@ static int caam_probe(struct platform_device *pdev) * Set handles init'ed by this module as the complement of the * already initialized ones */ - ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; + ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_MASK; /* Enable RDB bit so that RNG works faster */ clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE); diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 4b6854bf896a..e796d3cb9be8 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -1254,6 +1254,8 @@ #define OP_ALG_ICV_OFF (0 << OP_ALG_ICV_SHIFT) #define OP_ALG_ICV_ON (1 << OP_ALG_ICV_SHIFT) +#define OP_ALG_PR_ON BIT(1) + #define OP_ALG_DIR_SHIFT 0 #define OP_ALG_DIR_MASK 1 #define OP_ALG_DECRYPT 0 diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index c191e8fd0fa7..0f810bc13b2b 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -524,9 +524,11 @@ struct rng4tst { u32 rsvd1[40]; #define RDSTA_SKVT 0x80000000 #define RDSTA_SKVN 0x40000000 +#define RDSTA_PR0 BIT(4) +#define RDSTA_PR1 BIT(5) #define RDSTA_IF0 0x00000001 #define RDSTA_IF1 0x00000002 -#define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0) +#define RDSTA_MASK (RDSTA_PR1 | RDSTA_PR0 | RDSTA_IF1 | RDSTA_IF0) u32 rdsta; u32 rsvd2[15]; }; From ea53756d831a1a5db3ca00a12747365e2fcb4bd8 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 19 Mar 2020 09:12:33 -0700 Subject: [PATCH 160/162] crypto: caam - limit single JD RNG output to maximum of 16 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to follow recommendation in SP800-90C (section "9.4 The Oversampling-NRBG Construction") limit the output of "generate" JD submitted to CAAM. See https://lore.kernel.org/linux-crypto/VI1PR0402MB3485EF10976A4A69F90E5B0F98580@VI1PR0402MB3485.eurprd04.prod.outlook.com/ for more details. This change should make CAAM's hwrng driver good enough to have 1024 quality rating. Signed-off-by: Andrey Smirnov Reviewed-by: Horia Geantă Cc: Chris Healy Cc: Lucas Stach Cc: Horia Geantă Cc: Herbert Xu Cc: Iuliana Prodan Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-imx@nxp.com Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamrng.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 988bfddbadc6..77d048dfe5d0 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -22,9 +22,7 @@ #include "jr.h" #include "error.h" -#define CAAM_RNG_MAX_FIFO_STORE_SIZE U16_MAX - -#define CAAM_RNG_FIFO_LEN SZ_32K /* Must be a multiple of 2 */ +#define CAAM_RNG_MAX_FIFO_STORE_SIZE 16 /* * Length of used descriptors, see caam_init_desc() @@ -65,14 +63,15 @@ static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err, complete(jctx->done); } -static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len) +static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma) { init_job_desc(desc, 0); /* + 1 cmd_sz */ /* Generate random bytes: + 1 cmd_sz */ append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG | OP_ALG_PR_ON); /* Store bytes: + 1 cmd_sz + caam_ptr_sz */ - append_fifo_store(desc, dst_dma, len, FIFOST_TYPE_RNGSTORE); + append_fifo_store(desc, dst_dma, + CAAM_RNG_MAX_FIFO_STORE_SIZE, FIFOST_TYPE_RNGSTORE); print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); @@ -92,7 +91,7 @@ static int caam_rng_read_one(struct device *jrdev, .err = &ret, }; - len = min_t(int, len, CAAM_RNG_MAX_FIFO_STORE_SIZE); + len = CAAM_RNG_MAX_FIFO_STORE_SIZE; dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE); if (dma_mapping_error(jrdev, dst_dma)) { @@ -102,7 +101,7 @@ static int caam_rng_read_one(struct device *jrdev, init_completion(done); err = caam_jr_enqueue(jrdev, - caam_init_desc(desc, dst_dma, len), + caam_init_desc(desc, dst_dma), caam_rng_done, &jctx); if (err == -EINPROGRESS) { wait_for_completion(done); @@ -122,7 +121,7 @@ static void caam_rng_fill_async(struct caam_rng_ctx *ctx) sg_init_table(sg, ARRAY_SIZE(sg)); nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg), - CAAM_RNG_FIFO_LEN); + CAAM_RNG_MAX_FIFO_STORE_SIZE); if (!nents) return; @@ -156,7 +155,7 @@ static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait) } out = kfifo_out(&ctx->fifo, dst, max); - if (kfifo_len(&ctx->fifo) <= CAAM_RNG_FIFO_LEN / 2) + if (kfifo_is_empty(&ctx->fifo)) schedule_work(&ctx->worker); return out; @@ -186,7 +185,8 @@ static int caam_init(struct hwrng *rng) if (!ctx->desc_async) return -ENOMEM; - if (kfifo_alloc(&ctx->fifo, CAAM_RNG_FIFO_LEN, GFP_DMA | GFP_KERNEL)) + if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE, + GFP_DMA | GFP_KERNEL)) return -ENOMEM; INIT_WORK(&ctx->worker, caam_rng_worker); @@ -246,6 +246,7 @@ int caam_rng_init(struct device *ctrldev) ctx->rng.cleanup = caam_cleanup; ctx->rng.read = caam_read; ctx->rng.priv = (unsigned long)ctx; + ctx->rng.quality = 1024; dev_info(ctrldev, "registering rng-caam\n"); From 6e4e00d8b68ca7eb30d08afb740033e0d36abe55 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 19 Mar 2020 12:01:14 -0600 Subject: [PATCH 161/162] crypto: arm[64]/poly1305 - add artifact to .gitignore files The .S_shipped yields a .S, and the pattern in these directories is to add that to .gitignore so that git-status doesn't raise a fuss. Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation") Reported-by: Emil Renner Berthing Cc: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld Signed-off-by: Herbert Xu --- arch/arm/crypto/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore index 31e1f538df7d..a3c7ad52a469 100644 --- a/arch/arm/crypto/.gitignore +++ b/arch/arm/crypto/.gitignore @@ -1,3 +1,4 @@ aesbs-core.S sha256-core.S sha512-core.S +poly1305-core.S From fcb90d51c375d09a034993cda262b68499e233a4 Mon Sep 17 00:00:00 2001 From: Lothar Rubusch Date: Fri, 20 Mar 2020 11:36:31 +0000 Subject: [PATCH 162/162] crypto: af_alg - bool type cosmetics When working with bool values the true and false definitions should be used instead of 1 and 0. Hopefully I fixed my mailer and apologize for that. Signed-off-by: Lothar Rubusch Signed-off-by: Herbert Xu --- crypto/af_alg.c | 10 +++++----- crypto/algif_hash.c | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 439367a8e95c..b1cd3535c525 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -821,8 +821,8 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, struct af_alg_tsgl *sgl; struct af_alg_control con = {}; long copied = 0; - bool enc = 0; - bool init = 0; + bool enc = false; + bool init = false; int err = 0; if (msg->msg_controllen) { @@ -830,13 +830,13 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, if (err) return err; - init = 1; + init = true; switch (con.op) { case ALG_OP_ENCRYPT: - enc = 1; + enc = true; break; case ALG_OP_DECRYPT: - enc = 0; + enc = false; break; default: return -EINVAL; diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 178f4cd75ef1..da1ffa4f7f8d 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -83,7 +83,7 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, goto unlock; } - ctx->more = 0; + ctx->more = false; while (msg_data_left(msg)) { int len = msg_data_left(msg); @@ -211,7 +211,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, } if (!result || ctx->more) { - ctx->more = 0; + ctx->more = false; err = crypto_wait_req(crypto_ahash_final(&ctx->req), &ctx->wait); if (err) @@ -436,7 +436,7 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk) ctx->result = NULL; ctx->len = len; - ctx->more = 0; + ctx->more = false; crypto_init_wait(&ctx->wait); ask->private = ctx;