blob: 332a0ea9a4c6d8622647c6f4aade4fdc7cee7418 [file] [log] [blame]
/*
* NVMe storage driver for depthcharge
* Copyright (c) 2015, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __DRIVERS_STORAGE_NVME_H__
#define __DRIVERS_STORAGE_NVME_H__
#include <pci.h>
#include <stdint.h>
#include <arch/barrier.h>
#include "drivers/storage/blockdev.h"
#include "base/list.h"
//#define DEBUG_PRINTS
#ifdef DEBUG_PRINTS
#define DEBUG(x) x
#else
#define DEBUG(x)
#endif
/* BSD style bit manipulation */
#define SET(t, f) ((t) |= (f))
#define ISSET(t, f) ((t) & (f))
#define CLR(t, f) ((t) &= ~(f))
/* Architecture memory page size
* These should eventually reference the arch header definitions
*/
#define NVME_PAGE_SHIFT 12
#define NVME_PAGE_SIZE (1UL << NVME_PAGE_SHIFT)
/* Max 1 PRP list per transfer */
#define MAX_PRP_LISTS 1
/* 8 bytes per entry */
#define PRP_ENTRY_SHIFT 3
/* 1 page per list */
#define PRP_LIST_SHIFT NVME_PAGE_SHIFT
/* 1 page of memory addressed per entry*/
#define PRP_ENTRY_XFER_SHIFT NVME_PAGE_SHIFT
#define PRP_ENTRIES_PER_LIST (1UL << (PRP_LIST_SHIFT - PRP_ENTRY_SHIFT))
#define NVME_MAX_XFER_BYTES ((MAX_PRP_LISTS * PRP_ENTRIES_PER_LIST ) << PRP_ENTRY_XFER_SHIFT)
/* Loop used to poll for command completions
* timeout in milliseconds
*/
#define WAIT_WHILE(expr, timeout) \
({ \
typeof(timeout) __counter = timeout * 1000; \
typeof(expr) __expr_val; \
while ((__expr_val = (expr)) && __counter--) \
udelay(1); \
__expr_val; \
})
/* Command timeout measured in milliseconds */
#define NVME_GENERIC_TIMEOUT 5000
#define writel_with_flush(a,b) do { writel(a, b); readl(b); } while (0)
typedef int NVME_STATUS;
#define NVME_SUCCESS 0
#define NVME_UNSUPPORTED -1
#define NVME_DEVICE_ERROR -2
#define NVME_OUT_OF_RESOURCES -3
#define NVME_TIMEOUT -4
#define NVME_INVALID_PARAMETER -5
#define NVME_ERROR(err) ((err) < 0?1:0)
#define PCI_CLASS_MASS_STORAGE 0x01 /* mass storage class */
#define PCI_CLASS_MASS_STORAGE_NVM 0x08 /* mass storage sub-class non-volatile memory. */
#define PCI_IF_NVMHCI 0x02 /* mass storage programming interface NVMHCI. */
/* Queue Definitions
* NOTE: The size of the IO queue is tuned for max_transfer_size as
* a performance optimization. Smaller size saves host memory at
* cost of performance.
*/
#define NVME_ASQ_SIZE 2 /* Number of admin submission queue entries, only 2 */
#define NVME_ACQ_SIZE 2 /* Number of admin completion queue entries, only 2 */
#define NVME_CSQ_SIZE 15 /* Number of I/O submission queue entries per queue, min 2, max 64 */
#define NVME_CCQ_SIZE 15 /* Number of I/O completion queue entries per queue, min 2, max 64 */
#define NVME_NUM_QUEUES 2 /* Number of queues (Admin + IO) supported by the driver, only 2 supported */
#define NVME_NUM_IO_QUEUES (NVME_NUM_QUEUES - 1) /* Number of IO queues (not counting Admin Queue) */
#define NVME_ADMIN_QUEUE_INDEX 0 /* Admin queu index must be 0 */
#define NVME_IO_QUEUE_INDEX 1 /* IO queue */
/*
* NVMe Controller Registers
*/
/* controller register offsets */
#define NVME_CAP_OFFSET 0x0000 /* Controller Capabilities */
#define NVME_VER_OFFSET 0x0008 /* Version */
#define NVME_INTMS_OFFSET 0x000c /* Interrupt Mask Set */
#define NVME_INTMC_OFFSET 0x0010 /* Interrupt Mask Clear */
#define NVME_CC_OFFSET 0x0014 /* Controller Configuration */
#define NVME_CSTS_OFFSET 0x001c /* Controller Status */
#define NVME_AQA_OFFSET 0x0024 /* Admin Queue Attributes */
#define NVME_ASQ_OFFSET 0x0028 /* Admin Submission Queue Base Address */
#define NVME_ACQ_OFFSET 0x0030 /* Admin Completion Queue Base Address */
#define NVME_SQ0_OFFSET 0x1000 /* Submission Queue 0 (admin) Tail Doorbell */
#define NVME_CQ0_OFFSET 0x1004 /* Completion Queue 0 (admin) Head Doorbell */
/* 3.1.1 Offset 00h: CAP - Controller Capabilities */
typedef uint64_t NVME_CAP;
#define NVME_CAP_TO(x) (500 * (((x) >> 24) & 0xff)) /* Timeout, ms (TO is in 500ms increments)*/
#define NVME_CAP_DSTRD(x) (1 << (2 + (((x) >> 32) & 0xf))) /* Doorbell Stride, bytes */
#define NVME_CAP_CSS(x) (((x) >> 37) & 0x7f) /* Command Set Supported */
#define NVME_CAP_CSS_NVM (1)
#define NVME_CAP_MPSMIN(x) (12 + (((x) >> 48) & 0xf)) /* Memory Page Size Minimum */
#define NVME_CAP_MQES(x) (((x) & 0xffff) + 1) /* Max Queue Entries Supported per queue */
/* 3.1.5 Offset 14h: CC - Controller Configuration */
typedef uint32_t NVME_CC;
#define NVME_CC_EN (1 << 0)
#define NVME_CC_IOCQES(x) (((x) & 0xf) << 20)
#define NVME_CC_IOSQES(x) (((x) & 0xf) << 16)
/* 3.1.6 Offset 1Ch: CSTS - Controller Status */
typedef uint32_t NVME_CSTS;
#define NVME_CSTS_RDY (1 << 0)
/* 3.1.8 Offset 24h: AQA - Admin Queue Attributes */
typedef uint32_t NVME_AQA;
#define NVME_AQA_ASQS(x) ((x) - 1)
#define NVME_AQA_ACQS(x) (((x) - 1) << 16)
/* 3.1.9 Offset 28h: ASQ - Admin Submission Queue Base Address */
typedef uint64_t NVME_ASQ;
/* 3.1.10 Offset 30h: ACQ - Admin Completion Queue Base Address */
typedef uint64_t NVME_ACQ;
/* 3.1.11 Offset (1000h + ((2y) * (DSTRD bytes)))
* SQyTDBL - Submission Queue y Tail Doorbell
*/
typedef uint32_t NVME_SQTDBL;
/* 3.1.12 Offset (1000h + ((2y + 1) * (DSTRD bytes)))
* CQyHDBL - Completion Queue y Head Doorbell
*/
typedef uint32_t NVME_CQHDBL;
/* These register offsets are defined as 0x1000 + (N * (DSTRD bytes))
* Get the doorbell stride bit shift value from the controller capabilities.
*/
#define NVME_SQTDBL_OFFSET(QID, DSTRD) (0x1000 + ((2 * (QID)) * (DSTRD))) /* Submission Queue y (NVM) Tail Doorbell */
#define NVME_CQHDBL_OFFSET(QID, DSTRD) (0x1000 + (((2 * (QID)) + 1) * (DSTRD))) /* Completion Queue y (NVM) Head Doorbell */
/*
* NVMe Command Set Types
*/
/* NVMe Admin Cmd Opcodes */
#define NVME_ADMIN_CRIOSQ_OPC 1
#define NVME_ADMIN_CRIOSQ_QID(x) (x)
#define NVME_ADMIN_CRIOSQ_QSIZE(x) (((x)-1) << 16)
#define NVME_ADMIN_CRIOSQ_CQID(x) ((x) << 16)
#define NVME_ADMIN_SETFEATURES_OPC 9
#define NVME_ADMIN_SETFEATURES_NUMQUEUES 7
#define NVME_ADMIN_CRIOCQ_OPC 5
#define NVME_ADMIN_CRIOCQ_QID(x) (x)
#define NVME_ADMIN_CRIOCQ_QSIZE(x) (((x)-1) << 16)
#define NVME_ADMIN_IDENTIFY_OPC 6
#define NVME_IO_FLUSH_OPC 0
#define NVME_IO_WRITE_OPC 1
#define NVME_IO_READ_OPC 2
/* Submission Queue */
typedef struct {
uint8_t opc; /* Opcode */
uint8_t flags; /* FUSE and PSDT, only 0 setting supported */
uint16_t cid; /* Command Identifier */
uint32_t nsid; /* Namespace Identifier */
uint64_t rsvd1;
uint64_t mptr; /* Metadata Pointer */
uint64_t prp[2]; /* PRP entries only, SGL not supported */
uint32_t cdw10;
uint32_t cdw11;
uint32_t cdw12;
uint32_t cdw13;
uint32_t cdw14;
uint32_t cdw15;
} NVME_SQ;
/* Completion Queue */
typedef struct {
uint32_t cdw0;
uint32_t rsvd1;
uint16_t sqhd; /* Submission Queue Head Pointer */
uint16_t sqid; /* Submission Queue Identifier */
uint16_t cid; /* Command Identifier */
uint16_t flags;
#define NVME_CQ_FLAGS_PHASE 0x1
#define NVME_CQ_FLAGS_SC(x) (((x) & 0x1FE) >> 1)
#define NVME_CQ_FLAGS_SCT(x) (((x) & 0xE00) >> 9)
} NVME_CQ;
typedef struct {
uint32_t power_flags; /* MP, MPS and NOPS */
uint32_t enlat; /* Entry Latency */
uint32_t exlat; /* Exit Latency */
uint32_t latency_flags;
uint8_t rsvd7[16]; /* Reserved as of Nvm Express 1.1 Spec */
} NVME_PSDESCRIPTOR;
/* Identify Controller Data */
typedef struct {
/* Controller Capabilities and Features 0-255 */
uint16_t vid; /* PCI Vendor ID */
uint16_t ssvid; /* PCI sub-system vendor ID */
uint8_t sn[20]; /* Produce serial number */
uint8_t mn[40]; /* Proeduct model number */
uint8_t fr[8]; /* Firmware Revision */
uint8_t rab; /* Recommended Arbitration Burst */
uint8_t ieee_oiu[3]; /* Organization Unique Identifier */
uint8_t cmic; /* Multi-interface Capabilities */
uint8_t mdts; /* Maximum Data Transfer Size */
uint8_t cntlid[2]; /* Controller ID */
uint8_t rsvd1[176]; /* Reserved as of Nvm Express 1.1 Spec */
//
// Admin Command Set Attributes
//
uint16_t oacs; /* Optional Admin Command Support */
uint8_t acl; /* Abort Command Limit */
uint8_t aerl; /* Async Event Request Limit */
uint8_t frmw; /* Firmware updates */
uint8_t lpa; /* Log Page Attributes */
uint8_t elpe; /* Error Log Page Entries */
uint8_t npss; /* Number of Power States Support */
uint8_t avscc; /* Admin Vendor Specific Command Configuration */
uint8_t apsta; /* Autonomous Power State Transition Attributes */
uint8_t rsvd2[246]; /* Reserved as of Nvm Express 1.1 Spec */
//
// NVM Command Set Attributes
//
uint8_t sqes; /* Submission Queue Entry Size */
uint8_t cqes; /* Completion Queue Entry Size */
uint16_t rsvd3; /* Reserved as of Nvm Express 1.1 Spec */
uint32_t nn; /* Number of Namespaces */
uint16_t oncs; /* Optional NVM Command Support */
uint16_t fuses; /* Fused Operation Support */
uint8_t fna; /* Format NVM Attributes */
uint8_t vwc; /* Volatile Write Cache */
uint16_t awun; /* Atomic Write Unit Normal */
uint16_t awupf; /* Atomic Write Unit Power Fail */
uint8_t nvscc; /* NVM Vendor Specific Command Configuration */
uint8_t rsvd4; /* Reserved as of Nvm Express 1.1 Spec */
uint16_t acwu; /* Atomic Compare & Write Unit */
uint16_t rsvd5; /* Reserved as of Nvm Express 1.1 Spec */
uint32_t sgls; /* SGL Support */
uint8_t rsvd6[164]; /* Reserved as of Nvm Express 1.1 Spec */
//
// I/O Command set Attributes
//
uint8_t rsvd7[1344]; /* Reserved as of Nvm Express 1.1 Spec */
//
// Power State Descriptors
//
NVME_PSDESCRIPTOR ps_descriptor[32];
uint8_t vendor_data[1024]; /* Vendor specific data */
} NVME_ADMIN_CONTROLLER_DATA;
typedef struct {
uint16_t ms; /* Metadata Size */
uint8_t lbads; /* LBA Data Size */
uint8_t rp; /* Relative Performance */
} NVME_LBAFORMAT;
/* Identify Namespace Data */
typedef struct {
uint64_t nsze; /* Namespace Size (total blocks in fm'd namespace) */
uint64_t ncap; /* Namespace Capacity (max number of logical blocks) */
uint64_t nuse; /* Namespace Utilization */
uint8_t nsfeat; /* Namespace Features */
uint8_t nlbaf; /* Number of LBA Formats */
uint8_t flbas; /* Formatted LBA size */
uint8_t mc; /* Metadata Capabilities */
uint8_t dpc; /* End-to-end Data Protection capabilities */
uint8_t dps; /* End-to-end Data Protection Type Settings */
uint8_t nmic; /* Namespace Multi-path I/O + NS Sharing Caps */
uint8_t rescap; /* Reservation Capabilities */
uint8_t rsvd1[88]; /* Reserved as of Nvm Express 1.1 Spec */
uint64_t eui64; /* IEEE Extended Unique Identifier */
NVME_LBAFORMAT lba_format[16];
uint8_t rsvd2[192]; /* Reserved as of Nvm Express 1.1 Spec */
uint8_t vendor_data[3712]; /* Vendor specific data */
} NVME_ADMIN_NAMESPACE_DATA;
typedef struct PrpList {
uint64_t prp_entry[PRP_ENTRIES_PER_LIST];
} PrpList;
/*
* Driver Types
*/
typedef struct NvmeCtrlr {
BlockDevCtrlr ctrlr;
ListNode drives;
pcidev_t dev;
uint32_t ctrlr_regs;
/* local copy of controller CAP register */
NVME_CAP cap;
/* virtual address of identify controller data */
NVME_ADMIN_CONTROLLER_DATA *controller_data;
/* virtual address of pre-allocated PRP Lists */
PrpList *prp_list[NVME_CSQ_SIZE];
/* virtual address of raw buffer, split into queues below */
uint8_t *buffer;
/* virtual addresses of queue buffers */
NVME_SQ *sq_buffer[NVME_NUM_QUEUES];
NVME_CQ *cq_buffer[NVME_NUM_QUEUES];
NVME_SQTDBL sq_t_dbl[NVME_NUM_QUEUES];
NVME_CQHDBL cq_h_dbl[NVME_NUM_QUEUES];
/* current phase of each queue */
uint8_t pt[NVME_NUM_QUEUES];
/* sq head index as of most recent completion */
uint16_t sqhd[NVME_NUM_QUEUES];
/* current command id for each queue */
uint16_t cid[NVME_NUM_QUEUES];
/* Actual IO SQ size accounting for MQES */
uint16_t iosq_sz;
/* Actual IO CQ size accounting for MQES*/
uint16_t iocq_sz;
} NvmeCtrlr;
typedef struct NvmeDrive {
BlockDev dev;
NvmeCtrlr *ctrlr;
uint32_t namespace_id;
ListNode list_node;
} NvmeDrive;
NvmeCtrlr *new_nvme_ctrlr(pcidev_t dev);
#endif /* __DRIVERS_STORAGE_NVME_H__ */