qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] block/iscsi: use 16 byte CDBs only when necessa


From: ronnie sahlberg
Subject: Re: [Qemu-devel] [PATCH] block/iscsi: use 16 byte CDBs only when necessary
Date: Wed, 4 Jun 2014 07:54:29 -0700

On Wed, Jun 4, 2014 at 7:43 AM, Peter Lieven <address@hidden> wrote:
> Am 04.06.2014 16:00, schrieb ronnie sahlberg:
>> Looks good.
>>
>> As an alternative, you could do the 10 vs 16 decision based on the LBA
>> instead of the size of the device :
>>
>> -    if (use_16_for_ws) {
>> +   if (lba >= 0x100000000) {
>>         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, 
>> lba,
>>                                             iscsilun->zeroblock,
>> iscsilun->block_size,
>>                                             nb_blocks, 0, !!(flags &
>> BDRV_REQ_MAY_UNMAP),
>>                                             0, 0, iscsi_co_generic_cb, 
>> &iTask);
>>     } else {
>>         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, 
>> lba,
>>                                             iscsilun->zeroblock,
>> iscsilun->block_size,
>>                                             nb_blocks, 0, !!(flags &
>> BDRV_REQ_MAY_UNMAP),
>>                                             0, 0, iscsi_co_generic_cb, 
>> &iTask);
>>     }
>>
>> That would mean you get to use the 10 version of the cdb even for very
>> large devices (as long as the IO is for blocks at the beginning of the
>> device) and thus provide partial avoidance of this issue for those
>> large devices.
>
> I like that idea, however I fear that this would introduce additional bugs.

Fair enough. Basing it on device size is fine.

> - Using 10 Byte CDBs where the target might expect 16 Byte CDBs?!

You can still use 10 byte commands, even on very large devices. You
just can not access the whole device using the 10 byte commands.
(I recall some of the legacy unixen used to do this in the old days.
Using READ6 for low LBAs and then switching to READ10 when the LBA got
too big for READ6.)


> - What if lba + num_blocks > 2^32-1 ?

That is fine. As long as lba <= 2^32-1


This means that with 10 you can actually read/write 65534 blocks
beyond block 2^32 :-)
LBA==0xffffffff and NumBlocks=0xffff
will read blocks 0xffffffff all the way to 0x10000fffd


>
> The switch I added is like Linux does it - as Paolo pointed out earlier.
>
> In my case the number of >2TB Targets is not that big so I can work with
> the switch based on the capacity. Until the bug is fixed I just can't move
> those 2TB volumes around on the storage array.
>
> Peter
>
>>
>>
>> ronnie shalberg
>>
>>
>> On Wed, Jun 4, 2014 at 6:47 AM, Peter Lieven <address@hidden> wrote:
>>> this patch changes the driver to uses 16 Byte CDBs for
>>> READ/WRITE only if the target requires 64bit lba addressing.
>>>
>>> On one hand this saves 6 bytes in each PDU on the other
>>> hand it seems that 10 Byte CDBs seems to be much better
>>> supported and tested as a recent issue I had with a
>>> major storage supplier lined out.
>>>
>>> For WRITESAME the logic is a bit more tricky as WRITESAME10
>>> with UNMAP was added really late. Thus a fallback to WRITESAME16
>>> is possible if it supports UNMAP and WRITESAME10 not.
>>>
>>> Signed-off-by: Peter Lieven <address@hidden>
>>> ---
>>>  block/iscsi.c |   58 
>>> +++++++++++++++++++++++++++++++++++++++------------------
>>>  1 file changed, 40 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/block/iscsi.c b/block/iscsi.c
>>> index d241e83..019b324 100644
>>> --- a/block/iscsi.c
>>> +++ b/block/iscsi.c
>>> @@ -65,6 +65,7 @@ typedef struct IscsiLun {
>>>      unsigned char *zeroblock;
>>>      unsigned long *allocationmap;
>>>      int cluster_sectors;
>>> +    bool use_16_for_rw;
>>>  } IscsiLun;
>>>
>>>  typedef struct IscsiTask {
>>> @@ -368,10 +369,17 @@ static int coroutine_fn 
>>> iscsi_co_writev(BlockDriverState *bs,
>>>      num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
>>>      iscsi_co_init_iscsitask(iscsilun, &iTask);
>>>  retry:
>>> -    iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
>>> -                                    data, num_sectors * 
>>> iscsilun->block_size,
>>> -                                    iscsilun->block_size, 0, 0, 0, 0, 0,
>>> -                                    iscsi_co_generic_cb, &iTask);
>>> +    if (iscsilun->use_16_for_rw) {
>>> +        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, 
>>> lba,
>>> +                                        data, num_sectors * 
>>> iscsilun->block_size,
>>> +                                        iscsilun->block_size, 0, 0, 0, 0, 
>>> 0,
>>> +                                        iscsi_co_generic_cb, &iTask);
>>> +    } else {
>>> +        iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, 
>>> lba,
>>> +                                        data, num_sectors * 
>>> iscsilun->block_size,
>>> +                                        iscsilun->block_size, 0, 0, 0, 0, 
>>> 0,
>>> +                                        iscsi_co_generic_cb, &iTask);
>>> +    }
>>>      if (iTask.task == NULL) {
>>>          g_free(buf);
>>>          return -ENOMEM;
>>> @@ -545,20 +553,17 @@ static int coroutine_fn 
>>> iscsi_co_readv(BlockDriverState *bs,
>>>
>>>      iscsi_co_init_iscsitask(iscsilun, &iTask);
>>>  retry:
>>> -    switch (iscsilun->type) {
>>> -    case TYPE_DISK:
>>> +    if (iscsilun->use_16_for_rw) {
>>>          iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
>>>                                         num_sectors * iscsilun->block_size,
>>>                                         iscsilun->block_size, 0, 0, 0, 0, 0,
>>>                                         iscsi_co_generic_cb, &iTask);
>>> -        break;
>>> -    default:
>>> +    } else {
>>>          iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
>>>                                         num_sectors * iscsilun->block_size,
>>>                                         iscsilun->block_size,
>>>                                         0, 0, 0, 0, 0,
>>>                                         iscsi_co_generic_cb, &iTask);
>>> -        break;
>>>      }
>>>      if (iTask.task == NULL) {
>>>          return -ENOMEM;
>>> @@ -864,19 +869,27 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState 
>>> *bs, int64_t sector_num,
>>>      struct IscsiTask iTask;
>>>      uint64_t lba;
>>>      uint32_t nb_blocks;
>>> +    bool use_16_for_ws = iscsilun->use_16_for_rw;
>>>
>>>      if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
>>>          return -EINVAL;
>>>      }
>>>
>>> -    if ((flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->lbp.lbpws) {
>>> -        /* WRITE SAME with UNMAP is not supported by the target,
>>> -         * fall back and try WRITE SAME without UNMAP */
>>> -        flags &= ~BDRV_REQ_MAY_UNMAP;
>>> +    if (flags & BDRV_REQ_MAY_UNMAP) {
>>> +        if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
>>> +            /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
>>> +            use_16_for_ws = true;
>>> +        }
>>> +        if (use_16_for_ws && !iscsilun->lbp.lbpws) {
>>> +            /* WRITESAME16 with UNMAP is not supported by the target,
>>> +             * fall back and try WRITESAME10/16 without UNMAP */
>>> +            flags &= ~BDRV_REQ_MAY_UNMAP;
>>> +            use_16_for_ws = iscsilun->use_16_for_rw;
>>> +        }
>>>      }
>>>
>>>      if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
>>> -        /* WRITE SAME without UNMAP is not supported by the target */
>>> +        /* WRITESAME without UNMAP is not supported by the target */
>>>          return -ENOTSUP;
>>>      }
>>>
>>> @@ -889,10 +902,18 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState 
>>> *bs, int64_t sector_num,
>>>
>>>      iscsi_co_init_iscsitask(iscsilun, &iTask);
>>>  retry:
>>> -    if (iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
>>> -                               iscsilun->zeroblock, iscsilun->block_size,
>>> -                               nb_blocks, 0, !!(flags & 
>>> BDRV_REQ_MAY_UNMAP),
>>> -                               0, 0, iscsi_co_generic_cb, &iTask) == NULL) 
>>> {
>>> +    if (use_16_for_ws) {
>>> +        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, 
>>> iscsilun->lun, lba,
>>> +                                            iscsilun->zeroblock, 
>>> iscsilun->block_size,
>>> +                                            nb_blocks, 0, !!(flags & 
>>> BDRV_REQ_MAY_UNMAP),
>>> +                                            0, 0, iscsi_co_generic_cb, 
>>> &iTask);
>>> +    } else {
>>> +        iTask.task = iscsi_writesame10_task(iscsilun->iscsi, 
>>> iscsilun->lun, lba,
>>> +                                            iscsilun->zeroblock, 
>>> iscsilun->block_size,
>>> +                                            nb_blocks, 0, !!(flags & 
>>> BDRV_REQ_MAY_UNMAP),
>>> +                                            0, 0, iscsi_co_generic_cb, 
>>> &iTask);
>>> +    }
>>> +    if (iTask.task == NULL) {
>>>          return -ENOMEM;
>>>      }
>>>
>>> @@ -1087,6 +1108,7 @@ static void iscsi_readcapacity_sync(IscsiLun 
>>> *iscsilun, Error **errp)
>>>                      iscsilun->num_blocks = rc16->returned_lba + 1;
>>>                      iscsilun->lbpme = rc16->lbpme;
>>>                      iscsilun->lbprz = rc16->lbprz;
>>> +                    iscsilun->use_16_for_rw = (rc16->returned_lba > 
>>> 0xffffffff);
>>>                  }
>>>              }
>>>              break;
>>> --
>>> 1.7.9.5
>>>
>



reply via email to

[Prev in Thread] Current Thread [Next in Thread]