More than 5 years have passed since last update.

TRIMをもうちょいちゃんと勉強する

Posted at 2019-04-28

10連休2日目は、TRIMコマンドについての勉強。
ソースコードは、Linux 2.6.39 baseで。

用語

/*
 * Erase/trim/discard
 */
# define MMC_ERASE_ARG			0x00000000
# define MMC_SECURE_ERASE_ARG		0x80000000
# define MMC_TRIM_ARG			0x00000001
# define MMC_DISCARD_ARG			0x00000003
# define MMC_SECURE_TRIM1_ARG		0x80000001
# define MMC_SECURE_TRIM2_ARG		0x80008000
# define MMC_SECURE_ARGS			0x80000000
# define MMC_TRIM_ARGS			0x00008001

SECURE_ERASE と、SECURE_TRIM については、 https://www.jedec.org/sites/default/files/Victor_Tsai.pdf の、Page 28-29を参照。
DISCARDについては、SD 5.1から導入したらしい。 https://patchwork.kernel.org/patch/10830473/

ext3

ioctl.c

	case FITRIM: {

		struct super_block *sb = inode->i_sb;
		struct fstrim_range range;
		int ret = 0;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (copy_from_user(&range, (struct fstrim_range *)arg,
				   sizeof(range)))
			return -EFAULT;

		ret = ext3_trim_fs(sb, &range);
		if (ret < 0)
			return ret;

		if (copy_to_user((struct fstrim_range *)arg, &range,
				 sizeof(range)))
			return -EFAULT;

		return 0;
	}

balloc.c

/**
 * ext3_trim_fs() -- trim ioctl handle function
 * @sb:			superblock for filesystem
 * @start:		First Byte to trim
 * @len:		number of Bytes to trim from start
 * @minlen:		minimum extent length in Bytes
 *
 * ext3_trim_fs goes through all allocation groups containing Bytes from
 * start to start+len. For each such a group ext3_trim_all_free function
 * is invoked to trim all free space.
 */
int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
{
	ext3_grpblk_t last_block, first_block, free_blocks;
	unsigned long first_group, last_group;
	unsigned long group, ngroups;
	struct ext3_group_desc *gdp;
	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
	uint64_t start, len, minlen, trimmed;
	ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
	int ret = 0;

	start = (range->start >> sb->s_blocksize_bits) +
		le32_to_cpu(es->s_first_data_block);
	len = range->len >> sb->s_blocksize_bits;
	minlen = range->minlen >> sb->s_blocksize_bits;
	trimmed = 0;

	if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
		return -EINVAL;
	if (start >= max_blks)
		goto out;
	if (start + len > max_blks)
		len = max_blks - start;

	ngroups = EXT3_SB(sb)->s_groups_count;
	smp_rmb();

	/* Determine first and last group to examine based on start and len */
	ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
				     &first_group, &first_block);
	ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
				     &last_group, &last_block);
	last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
	last_block = EXT3_BLOCKS_PER_GROUP(sb);

	if (first_group > last_group)
		return -EINVAL;

	for (group = first_group; group <= last_group; group++) {
		gdp = ext3_get_group_desc(sb, group, NULL);
		if (!gdp)
			break;

		free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
		if (free_blocks < minlen)
			continue;

		/*
		 * For all the groups except the last one, last block will
		 * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to
		 * change it for the last group in which case first_block +
		 * len < EXT3_BLOCKS_PER_GROUP(sb).
		 */
		if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb))
			last_block = first_block + len;
		len -= last_block - first_block;

		ret = ext3_trim_all_free(sb, group, first_block,
					last_block, minlen);
		if (ret < 0)
			break;

		trimmed += ret;
		first_block = 0;
	}

	if (ret >= 0)
		ret = 0;

out:
	range->len = trimmed * sb->s_blocksize;

	return ret;
}

balloc.c

/**
 * ext3_trim_all_free -- function to trim all free space in alloc. group
 * @sb:			super block for file system
 * @group:		allocation group to trim
 * @start:		first group block to examine
 * @max:		last group block to examine
 * @gdp:		allocation group description structure
 * @minblocks:		minimum extent block count
 *
 * ext3_trim_all_free walks through group's block bitmap searching for free
 * blocks. When the free block is found, it tries to allocate this block and
 * consequent free block to get the biggest free extent possible, until it
 * reaches any used block. Then issue a TRIM command on this extent and free
 * the extent in the block bitmap. This is done until whole group is scanned.
 */
ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
				ext3_grpblk_t start, ext3_grpblk_t max,
				ext3_grpblk_t minblocks)
{
	handle_t *handle;
	ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
	ext3_fsblk_t discard_block;
	struct ext3_sb_info *sbi;
	struct buffer_head *gdp_bh, *bitmap_bh = NULL;
	struct ext3_group_desc *gdp;
	int err = 0, ret = 0;

	/*
	 * We will update one block bitmap, and one group descriptor
	 */
	handle = ext3_journal_start_sb(sb, 2);
	if (IS_ERR(handle))
		return PTR_ERR(handle);

	bitmap_bh = read_block_bitmap(sb, group);
	if (!bitmap_bh) {
		err = -EIO;
		goto err_out;
	}

	BUFFER_TRACE(bitmap_bh, "getting undo access");
	err = ext3_journal_get_undo_access(handle, bitmap_bh);
	if (err)
		goto err_out;

	gdp = ext3_get_group_desc(sb, group, &gdp_bh);
	if (!gdp) {
		err = -EIO;
		goto err_out;
	}

	BUFFER_TRACE(gdp_bh, "get_write_access");
	err = ext3_journal_get_write_access(handle, gdp_bh);
	if (err)
		goto err_out;

	free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
	sbi = EXT3_SB(sb);

	 /* Walk through the whole group */
	while (start < max) {
		start = bitmap_search_next_usable_block(start, bitmap_bh, max);
		if (start < 0)
			break;
		next = start;

		/*
		 * Allocate contiguous free extents by setting bits in the
		 * block bitmap
		 */
		while (next < max
			&& claim_block(sb_bgl_lock(sbi, group),
					next, bitmap_bh)) {
			next++;
		}

		 /* We did not claim any blocks */
		if (next == start)
			continue;

		discard_block = (ext3_fsblk_t)start +
				ext3_group_first_block_no(sb, group);

		/* Update counters */
		spin_lock(sb_bgl_lock(sbi, group));
		le16_add_cpu(&gdp->bg_free_blocks_count, start - next);
		spin_unlock(sb_bgl_lock(sbi, group));
		percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);

		free_blocks -= next - start;
		/* Do not issue a TRIM on extents smaller than minblocks */
		if ((next - start) < minblocks)
			goto free_extent;

		 /* Send the TRIM command down to the device */
		err = sb_issue_discard(sb, discard_block, next - start,
				       GFP_NOFS, 0);
		count += (next - start);
free_extent:
		freed = 0;

		/*
		 * Clear bits in the bitmap
		 */
		for (bit = start; bit < next; bit++) {
			BUFFER_TRACE(bitmap_bh, "clear bit");
			if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group),
						bit, bitmap_bh->b_data)) {
				ext3_error(sb, __func__,
					"bit already cleared for block "E3FSBLK,
					 (unsigned long)bit);
				BUFFER_TRACE(bitmap_bh, "bit already cleared");
			} else {
				freed++;
			}
		}

		/* Update couters */
		spin_lock(sb_bgl_lock(sbi, group));
		le16_add_cpu(&gdp->bg_free_blocks_count, freed);
		spin_unlock(sb_bgl_lock(sbi, group));
		percpu_counter_add(&sbi->s_freeblocks_counter, freed);

		start = next;
		if (err < 0) {
			if (err != -EOPNOTSUPP)
				ext3_warning(sb, __func__, "Discard command "
					     "returned error %d\n", err);
			break;
		}

		if (fatal_signal_pending(current)) {
			err = -ERESTARTSYS;
			break;
		}

		cond_resched();

		/* No more suitable extents */
		if (free_blocks < minblocks)
			break;
	}

	/* We dirtied the bitmap block */
	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
	ret = ext3_journal_dirty_metadata(handle, bitmap_bh);
	if (!err)
		err = ret;

	/* And the group descriptor block */
	BUFFER_TRACE(gdp_bh, "dirtied group descriptor block");
	ret = ext3_journal_dirty_metadata(handle, gdp_bh);
	if (!err)
		err = ret;

	ext3_debug("trimmed %d blocks in the group %d\n",
		count, group);

err_out:
	if (err)
		count = err;
	ext3_journal_stop(handle);
	brelse(bitmap_bh);

	return count;
}

# block device

blkdev.h

static inline int sb_issue_discard(struct super_block *sb, sector_t block,
		sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
{
	return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9),
				    nr_blocks << (sb->s_blocksize_bits - 9),
				    gfp_mask, flags);
}

blk-lib.c

/**
 * blkdev_issue_discard - queue a discard
 * @bdev:	blockdev to issue discard for
 * @sector:	start sector
 * @nr_sects:	number of sectors to discard
 * @gfp_mask:	memory allocation flags (for bio_alloc)
 * @flags:	BLKDEV_IFL_* flags to control behaviour
 *
 * Description:
 *    Issue a discard request for the sectors in question.
 */
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
{
	DECLARE_COMPLETION_ONSTACK(wait);
	struct request_queue *q = bdev_get_queue(bdev);
	int type = REQ_WRITE | REQ_DISCARD;
	unsigned int max_discard_sectors;
	struct bio *bio;
	int ret = 0;

	if (!q)
		return -ENXIO;

	if (!blk_queue_discard(q))
		return -EOPNOTSUPP;

	/*
	 * Ensure that max_discard_sectors is of the proper
	 * granularity
	 */
	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
	if (q->limits.discard_granularity) {
		unsigned int disc_sects = q->limits.discard_granularity >> 9;

		max_discard_sectors &= ~(disc_sects - 1);
	}

	if (flags & BLKDEV_DISCARD_SECURE) {
		if (!blk_queue_secdiscard(q))
			return -EOPNOTSUPP;
		type |= REQ_SECURE;
	}

	while (nr_sects && !ret) {
		bio = bio_alloc(gfp_mask, 1);
		if (!bio) {
			ret = -ENOMEM;
			break;
		}

		bio->bi_sector = sector;
		bio->bi_end_io = blkdev_discard_end_io;
		bio->bi_bdev = bdev;
		bio->bi_private = &wait;

		if (nr_sects > max_discard_sectors) {
			bio->bi_size = max_discard_sectors << 9;
			nr_sects -= max_discard_sectors;
			sector += max_discard_sectors;
		} else {
			bio->bi_size = nr_sects << 9;
			nr_sects = 0;
		}

		bio_get(bio);
		submit_bio(type, bio);

		wait_for_completion(&wait);

		if (bio_flagged(bio, BIO_EOPNOTSUPP))
			ret = -EOPNOTSUPP;
		else if (!bio_flagged(bio, BIO_UPTODATE))
			ret = -EIO;
		bio_put(bio);
	}

	return ret;
}
EXPORT_SYMBOL(blkdev_issue_discard);

で、ここから先はここにまとまってる

ここまでのポイント。

REQ_WRITE | REQ_DISCARD か、
REQ_WRITE | REQ_DISCARD | REQ_SECURE で要求が来る。

mmc/card

block.c

module_init(mmc_blk_init);
module_exit(mmc_blk_exit);

static struct mmc_driver mmc_driver = {
	.drv		= {
		.name	= "mmcblk",
	},
	.probe		= mmc_blk_probe,
	.remove		= mmc_blk_remove,
	.suspend	= mmc_blk_suspend,
	.resume		= mmc_blk_resume,
};

static int __init mmc_blk_init(void)
{
	int res;

	if (perdev_minors != CONFIG_MMC_BLOCK_MINORS)
		pr_info("mmcblk: using %d minors per device\n", perdev_minors);

	max_devices = 256 / perdev_minors;

	res = register_blkdev(MMC_BLOCK_MAJOR, "mmc");
	if (res)
		goto out;

	res = mmc_register_driver(&mmc_driver);
	if (res)
		goto out2;

	return 0;
 out2:
	unregister_blkdev(MMC_BLOCK_MAJOR, "mmc");
 out:
	return res;
}

static int mmc_blk_probe(struct mmc_card *card)
{
	struct mmc_blk_data *md;
	int err;
	char cap_str[10];

	/*
	 * Check that the card supports the command class(es) we need.
	 */
	if (!(card->csd.cmdclass & CCC_BLOCK_READ))
		return -ENODEV;

	md = mmc_blk_alloc(card);
	if (IS_ERR(md))
		return PTR_ERR(md);

	err = mmc_blk_set_blksize(md, card);
	if (err)
		goto out;

	string_get_size((u64)get_capacity(md->disk) << 9, STRING_UNITS_2,
			cap_str, sizeof(cap_str));
	printk(KERN_INFO "%s: %s %s %s %s\n",
		md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
		cap_str, md->read_only ? "(ro)" : "");

	mmc_set_drvdata(card, md);
	add_disk(md->disk);
	return 0;

 out:
	mmc_cleanup_queue(&md->queue);
	mmc_blk_put(md);

	return err;
}

static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
{
	struct mmc_blk_data *md;
	int devidx, ret;

	devidx = find_first_zero_bit(dev_use, max_devices);
	if (devidx >= max_devices)
		return ERR_PTR(-ENOSPC);
	__set_bit(devidx, dev_use);

	md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL);
	if (!md) {
		ret = -ENOMEM;
		goto out;
	}


	/*
	 * Set the read-only status based on the supported commands
	 * and the write protect switch.
	 */
	md->read_only = mmc_blk_readonly(card);

	md->disk = alloc_disk(perdev_minors);
	if (md->disk == NULL) {
		ret = -ENOMEM;
		goto err_kfree;
	}

	spin_lock_init(&md->lock);
	md->usage = 1;

	ret = mmc_init_queue(&md->queue, card, &md->lock);
	if (ret)
		goto err_putdisk;

	md->queue.issue_fn = mmc_blk_issue_rq;
	md->queue.data = md;

	md->disk->major	= MMC_BLOCK_MAJOR;
	md->disk->first_minor = devidx * perdev_minors;
	md->disk->fops = &mmc_bdops;
	md->disk->private_data = md;
	md->disk->queue = md->queue.queue;
	md->disk->driverfs_dev = &card->dev;
	set_disk_ro(md->disk, md->read_only);

	/*
	 * As discussed on lkml, GENHD_FL_REMOVABLE should:
	 *
	 * - be set for removable media with permanent block devices
	 * - be unset for removable block devices with permanent media
	 *
	 * Since MMC block devices clearly fall under the second
	 * case, we do not set GENHD_FL_REMOVABLE.  Userspace
	 * should use the block device creation/destruction hotplug
	 * messages to tell when the card is present.
	 */

	snprintf(md->disk->disk_name, sizeof(md->disk->disk_name),
		"mmcblk%d", devidx);

	blk_queue_logical_block_size(md->queue.queue, 512);

	if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
		/*
		 * The EXT_CSD sector count is in number or 512 byte
		 * sectors.
		 */
		set_capacity(md->disk, card->ext_csd.sectors);
	} else {
		/*
		 * The CSD capacity field is in units of read_blkbits.
		 * set_capacity takes units of 512 bytes.
		 */
		set_capacity(md->disk,
			card->csd.capacity << (card->csd.read_blkbits - 9));
	}
	return md;

 err_putdisk:
	put_disk(md->disk);
 err_kfree:
	kfree(md);
 out:
	return ERR_PTR(ret);
}

mmc_init_queue()

queue.c

/**
 * mmc_init_queue - initialise a queue structure.
 * @mq: mmc queue
 * @card: mmc card to attach this queue
 * @lock: queue lock
 *
 * Initialise a MMC card request queue.
 */
int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock)
{
	struct mmc_host *host = card->host;
	u64 limit = BLK_BOUNCE_HIGH;
	int ret;

	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
		limit = *mmc_dev(host)->dma_mask;

	mq->card = card;
	mq->queue = blk_init_queue(mmc_request, lock);
	if (!mq->queue)
		return -ENOMEM;

	mq->queue->queuedata = mq;
	mq->req = NULL;

	blk_queue_prep_rq(mq->queue, mmc_prep_request);
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
	if (mmc_can_erase(card)) {
		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
		mq->queue->limits.max_discard_sectors = UINT_MAX;
		if (card->erased_byte == 0)
			mq->queue->limits.discard_zeroes_data = 1;
		if (!mmc_can_trim(card) && is_power_of_2(card->erase_size)) {
			mq->queue->limits.discard_granularity =
							card->erase_size << 9;
			mq->queue->limits.discard_alignment =
							card->erase_size << 9;
		}
		if (mmc_can_secure_erase_trim(card))
			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD,
						mq->queue);
	}

# ifdef CONFIG_MMC_BLOCK_BOUNCE
	if (host->max_segs == 1) {
		unsigned int bouncesz;

		bouncesz = MMC_QUEUE_BOUNCESZ;

		if (bouncesz > host->max_req_size)
			bouncesz = host->max_req_size;
		if (bouncesz > host->max_seg_size)
			bouncesz = host->max_seg_size;
		if (bouncesz > (host->max_blk_count * 512))
			bouncesz = host->max_blk_count * 512;

		if (bouncesz > 512) {
			mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
			if (!mq->bounce_buf) {
				printk(KERN_WARNING "%s: unable to "
					"allocate bounce buffer\n",
					mmc_card_name(card));
			}
		}

		if (mq->bounce_buf) {
			blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
			blk_queue_max_hw_sectors(mq->queue, bouncesz / 512);
			blk_queue_max_segments(mq->queue, bouncesz / 512);
			blk_queue_max_segment_size(mq->queue, bouncesz);

			mq->sg = kmalloc(sizeof(struct scatterlist),
				GFP_KERNEL);
			if (!mq->sg) {
				ret = -ENOMEM;
				goto cleanup_queue;
			}
			sg_init_table(mq->sg, 1);

			mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
				bouncesz / 512, GFP_KERNEL);
			if (!mq->bounce_sg) {
				ret = -ENOMEM;
				goto cleanup_queue;
			}
			sg_init_table(mq->bounce_sg, bouncesz / 512);
		}
	}
# endif

	if (!mq->bounce_buf) {
		blk_queue_bounce_limit(mq->queue, limit);
		blk_queue_max_hw_sectors(mq->queue,
			min(host->max_blk_count, host->max_req_size / 512));
		blk_queue_max_segments(mq->queue, host->max_segs);
		blk_queue_max_segment_size(mq->queue, host->max_seg_size);

		mq->sg = kmalloc(sizeof(struct scatterlist) *
			host->max_segs, GFP_KERNEL);
		if (!mq->sg) {
			ret = -ENOMEM;
			goto cleanup_queue;
		}
		sg_init_table(mq->sg, host->max_segs);
	}

	sema_init(&mq->thread_sem, 1);

	mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd/%d",
		host->index);

	if (IS_ERR(mq->thread)) {
		ret = PTR_ERR(mq->thread);
		goto free_bounce_sg;
	}

	return 0;
 free_bounce_sg:
 	if (mq->bounce_sg)
 		kfree(mq->bounce_sg);
 	mq->bounce_sg = NULL;
 cleanup_queue:
 	if (mq->sg)
		kfree(mq->sg);
	mq->sg = NULL;
	if (mq->bounce_buf)
		kfree(mq->bounce_buf);
	mq->bounce_buf = NULL;
	blk_cleanup_queue(mq->queue);
	return ret;
}

static int mmc_queue_thread(void *d)
{
	struct mmc_queue *mq = d;
	struct request_queue *q = mq->queue;

	current->flags |= PF_MEMALLOC;

	down(&mq->thread_sem);
	do {
		struct request *req = NULL;

		spin_lock_irq(q->queue_lock);
		set_current_state(TASK_INTERRUPTIBLE);
		req = blk_fetch_request(q);
		mq->req = req;
		spin_unlock_irq(q->queue_lock);

		if (!req) {
			if (kthread_should_stop()) {
				set_current_state(TASK_RUNNING);
				break;
			}
			up(&mq->thread_sem);
			schedule();
			down(&mq->thread_sem);
			continue;
		}
		set_current_state(TASK_RUNNING);

		mq->issue_fn(mq, req);
	} while (1);
	up(&mq->thread_sem);

	return 0;
}

md->queue.issue_fn = mmc_blk_issue_rq;
mmc_queue->issue_fn(mq,req)　が実行される

block.c

static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
{
	if (req->cmd_flags & REQ_DISCARD) {
		if (req->cmd_flags & REQ_SECURE)
			return mmc_blk_issue_secdiscard_rq(mq, req);
		else
			return mmc_blk_issue_discard_rq(mq, req);
	} else {
		return mmc_blk_issue_rw_rq(mq, req);
	}
}

static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
{
	struct mmc_blk_data *md = mq->data;
	struct mmc_card *card = md->queue.card;
	unsigned int from, nr, arg;
	int err = 0;

	mmc_claim_host(card->host);

	if (!mmc_can_erase(card)) {
		err = -EOPNOTSUPP;
		goto out;
	}

	from = blk_rq_pos(req);
	nr = blk_rq_sectors(req);

	if (mmc_can_trim(card))
		arg = MMC_TRIM_ARG;
	else
		arg = MMC_ERASE_ARG;

	err = mmc_erase(card, from, nr, arg);
out:
	spin_lock_irq(&md->lock);
	__blk_end_request(req, err, blk_rq_bytes(req));
	spin_unlock_irq(&md->lock);

	mmc_release_host(card->host);

	return err ? 0 : 1;
}

static int mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
				       struct request *req)
{
	struct mmc_blk_data *md = mq->data;
	struct mmc_card *card = md->queue.card;
	unsigned int from, nr, arg;
	int err = 0;

	mmc_claim_host(card->host);

	if (!mmc_can_secure_erase_trim(card)) {
		err = -EOPNOTSUPP;
		goto out;
	}

	from = blk_rq_pos(req);
	nr = blk_rq_sectors(req);

	if (mmc_can_trim(card) && !mmc_erase_group_aligned(card, from, nr))
		arg = MMC_SECURE_TRIM1_ARG;
	else
		arg = MMC_SECURE_ERASE_ARG;

	err = mmc_erase(card, from, nr, arg);
	if (!err && arg == MMC_SECURE_TRIM1_ARG)
		err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
out:
	spin_lock_irq(&md->lock);
	__blk_end_request(req, err, blk_rq_bytes(req));
	spin_unlock_irq(&md->lock);

	mmc_release_host(card->host);

	return err ? 0 : 1;
}

つまり、3パターンある。

mmc_erase(card, from, nr, MMC_TRIM_ARG)
mmc_erase(card, from, nr, MMC_ERASE_ARG)
mmc_erase(card, from, nr, MMC_TRIM1_ARG) + mmc_erase(card, from, nr, MMC_TRIM2_ARG)

どれを使うのかは、下記による。

mmc_can_trim()
mmc_erase_group_aligned()

mmc/core

core.c


int mmc_can_trim(struct mmc_card *card)
{
	if (card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)
		return 1;
	return 0;
}

/**
 * mmc_erase - erase sectors.
 * @card: card to erase
 * @from: first sector to erase
 * @nr: number of sectors to erase
 * @arg: erase command argument (SD supports only %MMC_ERASE_ARG)
 *
 * Caller must claim host before calling this function.
 */
int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
	      unsigned int arg)
{
	unsigned int rem, to = from + nr;

	if (!(card->host->caps & MMC_CAP_ERASE) ||
	    !(card->csd.cmdclass & CCC_ERASE))
		return -EOPNOTSUPP;

	if (!card->erase_size)
		return -EOPNOTSUPP;

	if (mmc_card_sd(card) && arg != MMC_ERASE_ARG)
		return -EOPNOTSUPP;

	if ((arg & MMC_SECURE_ARGS) &&
	    !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN))
		return -EOPNOTSUPP;

	if ((arg & MMC_TRIM_ARGS) &&
	    !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN))
		return -EOPNOTSUPP;

	if (arg == MMC_SECURE_ERASE_ARG) {
		if (from % card->erase_size || nr % card->erase_size)
			return -EINVAL;
	}

	if (arg == MMC_ERASE_ARG) {
		rem = from % card->erase_size;
		if (rem) {
			rem = card->erase_size - rem;
			from += rem;
			if (nr > rem)
				nr -= rem;
			else
				return 0;
		}
		rem = nr % card->erase_size;
		if (rem)
			nr -= rem;
	}

	if (nr == 0)
		return 0;

	to = from + nr;

	if (to <= from)
		return -EINVAL;

	/* 'from' and 'to' are inclusive */
	to -= 1;

	return mmc_do_erase(card, from, to, arg);
}

static int mmc_do_erase(struct mmc_card *card, unsigned int from,
			unsigned int to, unsigned int arg)
{
	struct mmc_command cmd;
	unsigned int qty = 0;
	int err;

	/*
	 * qty is used to calculate the erase timeout which depends on how many
	 * erase groups (or allocation units in SD terminology) are affected.
	 * We count erasing part of an erase group as one erase group.
	 * For SD, the allocation units are always a power of 2.  For MMC, the
	 * erase group size is almost certainly also power of 2, but it does not
	 * seem to insist on that in the JEDEC standard, so we fall back to
	 * division in that case.  SD may not specify an allocation unit size,
	 * in which case the timeout is based on the number of write blocks.
	 *
	 * Note that the timeout for secure trim 2 will only be correct if the
	 * number of erase groups specified is the same as the total of all
	 * preceding secure trim 1 commands.  Since the power may have been
	 * lost since the secure trim 1 commands occurred, it is generally
	 * impossible to calculate the secure trim 2 timeout correctly.
	 */
	if (card->erase_shift)
		qty += ((to >> card->erase_shift) -
			(from >> card->erase_shift)) + 1;
	else if (mmc_card_sd(card))
		qty += to - from + 1;
	else
		qty += ((to / card->erase_size) -
			(from / card->erase_size)) + 1;

	if (!mmc_card_blockaddr(card)) {
		from <<= 9;
		to <<= 9;
	}

	memset(&cmd, 0, sizeof(struct mmc_command));
	if (mmc_card_sd(card))
		cmd.opcode = SD_ERASE_WR_BLK_START;
	else
		cmd.opcode = MMC_ERASE_GROUP_START;
	cmd.arg = from;
	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
	err = mmc_wait_for_cmd(card->host, &cmd, 0);
	if (err) {
		printk(KERN_ERR "mmc_erase: group start error %d, "
		       "status %#x\n", err, cmd.resp[0]);
		err = -EINVAL;
		goto out;
	}

	memset(&cmd, 0, sizeof(struct mmc_command));
	if (mmc_card_sd(card))
		cmd.opcode = SD_ERASE_WR_BLK_END;
	else
		cmd.opcode = MMC_ERASE_GROUP_END;
	cmd.arg = to;
	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
	err = mmc_wait_for_cmd(card->host, &cmd, 0);
	if (err) {
		printk(KERN_ERR "mmc_erase: group end error %d, status %#x\n",
		       err, cmd.resp[0]);
		err = -EINVAL;
		goto out;
	}

	memset(&cmd, 0, sizeof(struct mmc_command));
	cmd.opcode = MMC_ERASE;
	cmd.arg = arg;
	cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
	mmc_set_erase_timeout(card, &cmd, arg, qty);
	err = mmc_wait_for_cmd(card->host, &cmd, 0);
	if (err) {
		printk(KERN_ERR "mmc_erase: erase error %d, status %#x\n",
		       err, cmd.resp[0]);
		err = -EIO;
		goto out;
	}

	if (mmc_host_is_spi(card->host))
		goto out;

	do {
		memset(&cmd, 0, sizeof(struct mmc_command));
		cmd.opcode = MMC_SEND_STATUS;
		cmd.arg = card->rca << 16;
		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
		/* Do not retry else we can't see errors */
		err = mmc_wait_for_cmd(card->host, &cmd, 0);
		if (err || (cmd.resp[0] & 0xFDF92000)) {
			printk(KERN_ERR "error %d requesting status %#x\n",
				err, cmd.resp[0]);
			err = -EIO;
			goto out;
		}
	} while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
		 R1_CURRENT_STATE(cmd.resp[0]) == 7);
out:
	return err;
}

cmd.opcode = MMC_ERASE;
cmd.arg = arg;
err = mmc_wait_for_cmd(card->host, &cmd, 0);

/**
 *	mmc_wait_for_cmd - start a command and wait for completion
 *	@host: MMC host to start command
 *	@cmd: MMC command to start
 *	@retries: maximum number of retries
 *
 *	Start a new MMC command for a host, and wait for the command
 *	to complete.  Return any error that occurred while the command
 *	was executing.  Do not attempt to parse the response.
 */
int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd, int retries)
{
	struct mmc_request mrq;

	WARN_ON(!host->claimed);

	memset(&mrq, 0, sizeof(struct mmc_request));

	memset(cmd->resp, 0, sizeof(cmd->resp));
	cmd->retries = retries;

	mrq.cmd = cmd;
	cmd->data = NULL;

	mmc_wait_for_req(host, &mrq);

	return cmd->error;
}

/**
 *	mmc_wait_for_req - start a request and wait for completion
 *	@host: MMC host to start command
 *	@mrq: MMC request to start
 *
 *	Start a new MMC custom command request for a host, and wait
 *	for the command to complete. Does not attempt to parse the
 *	response.
 */
void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
{
	DECLARE_COMPLETION_ONSTACK(complete);

	mrq->done_data = &complete;
	mrq->done = mmc_wait_done;

	mmc_start_request(host, mrq);

	wait_for_completion(&complete);
}

static void
mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
{
# ifdef CONFIG_MMC_DEBUG
	unsigned int i, sz;
	struct scatterlist *sg;
# endif

	pr_debug("%s: starting CMD%u arg %08x flags %08x\n",
		 mmc_hostname(host), mrq->cmd->opcode,
		 mrq->cmd->arg, mrq->cmd->flags);

	if (mrq->data) {
		pr_debug("%s:     blksz %d blocks %d flags %08x "
			"tsac %d ms nsac %d\n",
			mmc_hostname(host), mrq->data->blksz,
			mrq->data->blocks, mrq->data->flags,
			mrq->data->timeout_ns / 1000000,
			mrq->data->timeout_clks);
	}

	if (mrq->stop) {
		pr_debug("%s:     CMD%u arg %08x flags %08x\n",
			 mmc_hostname(host), mrq->stop->opcode,
			 mrq->stop->arg, mrq->stop->flags);
	}

	WARN_ON(!host->claimed);

	mrq->cmd->error = 0;
	mrq->cmd->mrq = mrq;
	if (mrq->data) {
		BUG_ON(mrq->data->blksz > host->max_blk_size);
		BUG_ON(mrq->data->blocks > host->max_blk_count);
		BUG_ON(mrq->data->blocks * mrq->data->blksz >
			host->max_req_size);

# ifdef CONFIG_MMC_DEBUG
		sz = 0;
		for_each_sg(mrq->data->sg, sg, mrq->data->sg_len, i)
			sz += sg->length;
		BUG_ON(sz != mrq->data->blocks * mrq->data->blksz);
# endif

		mrq->cmd->data = mrq->data;
		mrq->data->error = 0;
		mrq->data->mrq = mrq;
		if (mrq->stop) {
			mrq->data->stop = mrq->stop;
			mrq->stop->error = 0;
			mrq->stop->mrq = mrq;
		}
	}
	mmc_host_clk_ungate(host);
	led_trigger_event(host->led, LED_FULL);
	host->ops->request(host, mrq);
}

ここでようやく、ホスト層に繋がる。

host->ops->request(host, mrq);

mmc/host

sdhci.c


static const struct mmc_host_ops sdhci_ops = {
	.request	= sdhci_request,
	.set_ios	= sdhci_set_ios,
	.get_ro		= sdhci_get_ro,
	.enable_sdio_irq = sdhci_enable_sdio_irq,
};

/*****************************************************************************\
 *                                                                           *
 * MMC callbacks                                                             *
 *                                                                           *
\*****************************************************************************/

static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
{
	struct sdhci_host *host;
	bool present;
	unsigned long flags;

	host = mmc_priv(mmc);

	spin_lock_irqsave(&host->lock, flags);

	WARN_ON(host->mrq != NULL);

# ifndef SDHCI_USE_LEDS_CLASS
	sdhci_activate_led(host);
# endif
	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12) {
		if (mrq->stop) {
			mrq->data->stop = NULL;
			mrq->stop = NULL;
		}
	}

	host->mrq = mrq;

	/* If polling, assume that the card is always present. */
	if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
		present = true;
	else
		present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
				SDHCI_CARD_PRESENT;

	if (!present || host->flags & SDHCI_DEVICE_DEAD) {
		host->mrq->cmd->error = -ENOMEDIUM;
		tasklet_schedule(&host->finish_tasklet);
	} else
		sdhci_send_command(host, mrq->cmd);

	mmiowb();
	spin_unlock_irqrestore(&host->lock, flags);
}

static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
{
	int flags;
	u32 mask;
	unsigned long timeout;

	WARN_ON(host->cmd);

	/* Wait max 10 ms */
	timeout = 10;

	mask = SDHCI_CMD_INHIBIT;
	if ((cmd->data != NULL) || (cmd->flags & MMC_RSP_BUSY))
		mask |= SDHCI_DATA_INHIBIT;

	/* We shouldn't wait for data inihibit for stop commands, even
	   though they might use busy signaling */
	if (host->mrq->data && (cmd == host->mrq->data->stop))
		mask &= ~SDHCI_DATA_INHIBIT;

	while (sdhci_readl(host, SDHCI_PRESENT_STATE) & mask) {
		if (timeout == 0) {
			printk(KERN_ERR "%s: Controller never released "
				"inhibit bit(s).\n", mmc_hostname(host->mmc));
			sdhci_dumpregs(host);
			cmd->error = -EIO;
			tasklet_schedule(&host->finish_tasklet);
			return;
		}
		timeout--;
		mdelay(1);
	}

	mod_timer(&host->timer, jiffies + 10 * HZ);

	host->cmd = cmd;

	sdhci_prepare_data(host, cmd->data);

	sdhci_writel(host, cmd->arg, SDHCI_ARGUMENT);

	sdhci_set_transfer_mode(host, cmd->data);

	if ((cmd->flags & MMC_RSP_136) && (cmd->flags & MMC_RSP_BUSY)) {
		printk(KERN_ERR "%s: Unsupported response type!\n",
			mmc_hostname(host->mmc));
		cmd->error = -EINVAL;
		tasklet_schedule(&host->finish_tasklet);
		return;
	}

	if (!(cmd->flags & MMC_RSP_PRESENT))
		flags = SDHCI_CMD_RESP_NONE;
	else if (cmd->flags & MMC_RSP_136)
		flags = SDHCI_CMD_RESP_LONG;
	else if (cmd->flags & MMC_RSP_BUSY)
		flags = SDHCI_CMD_RESP_SHORT_BUSY;
	else
		flags = SDHCI_CMD_RESP_SHORT;

	if (cmd->flags & MMC_RSP_CRC)
		flags |= SDHCI_CMD_CRC;
	if (cmd->flags & MMC_RSP_OPCODE)
		flags |= SDHCI_CMD_INDEX;
	if (cmd->data)
		flags |= SDHCI_CMD_DATA;

	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
}

sdhci.h

struct sdhci_ops {
# ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
	u32		(*read_l)(struct sdhci_host *host, int reg);
	u16		(*read_w)(struct sdhci_host *host, int reg);
	u8		(*read_b)(struct sdhci_host *host, int reg);
	void		(*write_l)(struct sdhci_host *host, u32 val, int reg);
	void		(*write_w)(struct sdhci_host *host, u16 val, int reg);
	void		(*write_b)(struct sdhci_host *host, u8 val, int reg);
# endif

	void	(*set_clock)(struct sdhci_host *host, unsigned int clock);

	int		(*enable_dma)(struct sdhci_host *host);
	unsigned int	(*get_max_clock)(struct sdhci_host *host);
	unsigned int	(*get_min_clock)(struct sdhci_host *host);
	unsigned int	(*get_timeout_clock)(struct sdhci_host *host);
	int		(*platform_8bit_width)(struct sdhci_host *host,
					       int width);
	void (*platform_send_init_74_clocks)(struct sdhci_host *host,
					     u8 power_mode);
	unsigned int    (*get_ro)(struct sdhci_host *host);
};


static inline void sdhci_writew(struct sdhci_host *host, u16 val, int reg)
{
	if (unlikely(host->ops->write_w))
		host->ops->write_w(host, val, reg);
	else
		writew(val, host->ioaddr + reg);
}

まとめ

・・・ていう流れで！！TRIMコマンドは実行されますと！！めでたしめでたし（調べたかったことはこれでOK)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up