qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH RESEND 11/15] ppc: spapr: Implement nested PAPR hcall - H_GUE


From: Nicholas Piggin
Subject: Re: [PATCH RESEND 11/15] ppc: spapr: Implement nested PAPR hcall - H_GUEST_[GET|SET]_STATE
Date: Thu, 07 Sep 2023 13:30:31 +1000

On Wed Sep 6, 2023 at 2:33 PM AEST, Harsh Prateek Bora wrote:
> L1 can reuest to get/set state of any of the supported Guest State
> Buffer (GSB) elements using h_guest_[get|set]_state hcalls.
> These hcalls needs to do some necessary validation check for each
> get/set request based on the flags passed and operation supported.
>
> Signed-off-by: Michael Neuling <mikey@neuling.org>
> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
> ---
>  hw/ppc/spapr_nested.c         | 267 ++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr_nested.h |  22 +++
>  2 files changed, 289 insertions(+)
>
> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
> index 6fbb1bcb02..498e7286fa 100644
> --- a/hw/ppc/spapr_nested.c
> +++ b/hw/ppc/spapr_nested.c
> @@ -897,6 +897,138 @@ void init_nested(void)
>      }
>  }
>  
> +static struct guest_state_element *guest_state_element_next(
> +    struct guest_state_element *element,
> +    int64_t *len,
> +    int64_t *num_elements)
> +{
> +    uint16_t size;
> +
> +    /* size is of element->value[] only. Not whole guest_state_element */
> +    size = be16_to_cpu(element->size);
> +
> +    if (len) {
> +        *len -= size + offsetof(struct guest_state_element, value);
> +    }
> +
> +    if (num_elements) {
> +        *num_elements -= 1;
> +    }
> +
> +    return (struct guest_state_element *)(element->value + size);
> +}
> +
> +static
> +struct guest_state_element_type *guest_state_element_type_find(uint16_t id)
> +{
> +    int i;
> +
> +    for (i = 0; i < ARRAY_SIZE(guest_state_element_types); i++)
> +        if (id == guest_state_element_types[i].id) {
> +            return &guest_state_element_types[i];
> +        }
> +
> +    return NULL;
> +}
> +
> +static void print_element(struct guest_state_element *element,
> +                          struct guest_state_request *gsr)
> +{
> +    printf("id:0x%04x size:0x%04x %s ",
> +           be16_to_cpu(element->id), be16_to_cpu(element->size),
> +           gsr->flags & GUEST_STATE_REQUEST_SET ? "set" : "get");
> +    printf("buf:0x%016lx ...\n", be64_to_cpu(*(uint64_t *)element->value));

No printfs. These could be GUEST_ERROR qemu logs if anything, make
sure they're relatively well formed messages if you keep them, i.e.,
something a Linux/KVM developer could understand what went wrong.
I.e., no __func__ which is internal to QEMU, use "H_GUEST_GET_STATE"
etc. Ditto for all the rest of the printfs.

> +}
> +
> +static bool guest_state_request_check(struct guest_state_request *gsr)
> +{
> +    int64_t num_elements, len = gsr->len;
> +    struct guest_state_buffer *gsb = gsr->gsb;
> +    struct guest_state_element *element;
> +    struct guest_state_element_type *type;
> +    uint16_t id, size;
> +
> +    /* gsb->num_elements = 0 == 32 bits long */
> +    assert(len >= 4);

I haven't looked closely, but can the guest can't crash the
host with malformed requests here?

This API is pretty complicated, make sure you sanitize all inputs
carefully, as early as possible, and without too deep a call and
control flow chain from the API entry point.


> +
> +    num_elements = be32_to_cpu(gsb->num_elements);
> +    element = gsb->elements;
> +    len -= sizeof(gsb->num_elements);
> +
> +    /* Walk the buffer to validate the length */
> +    while (num_elements) {
> +
> +        id = be16_to_cpu(element->id);
> +        size = be16_to_cpu(element->size);
> +
> +        if (false) {
> +            print_element(element, gsr);
> +        }
> +        /* buffer size too small */
> +        if (len < 0) {
> +            return false;
> +        }
> +
> +        type = guest_state_element_type_find(id);
> +        if (!type) {
> +            printf("%s: Element ID %04x unknown\n", __func__, id);
> +            print_element(element, gsr);
> +            return false;
> +        }
> +
> +        if (id == GSB_HV_VCPU_IGNORED_ID) {
> +            goto next_element;
> +        }
> +
> +        if (size != type->size) {
> +            printf("%s: Size mismatch. Element ID:%04x. Size Exp:%i 
> Got:%i\n",
> +                   __func__, id, type->size, size);
> +            print_element(element, gsr);
> +            return false;
> +        }
> +
> +        if ((type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY) &&
> +            (gsr->flags & GUEST_STATE_REQUEST_SET)) {
> +            printf("%s: trying to set a read-only Element ID:%04x.\n",
> +                   __func__, id);
> +            return false;
> +        }
> +
> +        if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) {
> +            /* guest wide element type */
> +            if (!(gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE)) {
> +                printf("%s: trying to set a guest wide Element ID:%04x.\n",
> +                       __func__, id);
> +                return false;
> +            }
> +        } else {
> +            /* thread wide element type */
> +            if (gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE) {
> +                printf("%s: trying to set a thread wide Element ID:%04x.\n",
> +                       __func__, id);
> +                return false;
> +            }
> +        }
> +next_element:
> +        element = guest_state_element_next(element, &len, &num_elements);
> +
> +    }
> +    return true;
> +}
> +
> +static bool is_gsr_invalid(struct guest_state_request *gsr,
> +                                   struct guest_state_element *element,
> +                                   struct guest_state_element_type *type)
> +{
> +    if ((gsr->flags & GUEST_STATE_REQUEST_SET) &&
> +        (*(uint64_t *)(element->value) & ~(type->mask))) {
> +        print_element(element, gsr);
> +        printf("L1 can't set reserved bits (allowed mask: 0x%08lx)\n",
> +               type->mask);
> +        return true;
> +    }
> +    return false;
> +}
>  
>  static target_ulong h_guest_get_capabilities(PowerPCCPU *cpu,
>                                               SpaprMachineState *spapr,
> @@ -1108,6 +1240,139 @@ static target_ulong h_guest_create_vcpu(PowerPCCPU 
> *cpu,
>      return H_SUCCESS;
>  }
>  
> +static target_ulong getset_state(SpaprMachineStateNestedGuest *guest,
> +                                 uint64_t vcpuid,
> +                                 struct guest_state_request *gsr)
> +{
> +    void *ptr;
> +    uint16_t id;
> +    struct guest_state_element *element;
> +    struct guest_state_element_type *type;
> +    int64_t lenleft, num_elements;
> +
> +    lenleft = gsr->len;
> +
> +    if (!guest_state_request_check(gsr)) {
> +        return H_P3;
> +    }
> +
> +    num_elements = be32_to_cpu(gsr->gsb->num_elements);
> +    element = gsr->gsb->elements;
> +    /* Process the elements */
> +    while (num_elements) {
> +        type = NULL;
> +        /* Debug print before doing anything */
> +        if (false) {
> +            print_element(element, gsr);
> +        }
> +
> +        id = be16_to_cpu(element->id);
> +        if (id == GSB_HV_VCPU_IGNORED_ID) {
> +            goto next_element;
> +        }
> +
> +        type = guest_state_element_type_find(id);
> +        assert(type);
> +
> +        /* Get pointer to guest data to get/set */
> +        if (type->location && type->copy) {
> +            ptr = type->location(guest, vcpuid);
> +            assert(ptr);
> +            if (!~(type->mask) && is_gsr_invalid(gsr, element, type)) {
> +                return H_INVALID_ELEMENT_VALUE;
> +            }
> +            type->copy(ptr + type->offset, element->value,
> +                       gsr->flags & GUEST_STATE_REQUEST_SET ? true : false);
> +        }
> +
> +next_element:
> +        element = guest_state_element_next(element, &lenleft, &num_elements);
> +    }
> +
> +    return H_SUCCESS;
> +}
> +
> +static target_ulong map_and_getset_state(PowerPCCPU *cpu,
> +                                         SpaprMachineStateNestedGuest *guest,
> +                                         uint64_t vcpuid,
> +                                         struct guest_state_request *gsr)
> +{
> +    target_ulong rc;
> +    int64_t lenleft, len;
> +    bool is_write;
> +
> +    assert(gsr->len < (1024 * 1024)); /* sanity check */

Use a #define for this, make sure guest can't crash host.
> +
> +    lenleft = len = gsr->len;

Why lenleft? Can't you just check gsr->len like you do gsr->gsb?

> +    gsr->gsb = address_space_map(CPU(cpu)->as, gsr->buf, (uint64_t *)&len,
> +                                 false, MEMTXATTRS_UNSPECIFIED);

So it's a read-only memory access to gsr->buf? Even for the set?

> +    if (!gsr->gsb) {
> +        rc = H_P3;
> +        goto out1;
> +    }
> +
> +    if (len != lenleft) {
> +        rc = H_P3;
> +        goto out1;
> +    }
> +
> +    rc = getset_state(guest, vcpuid, gsr);
> +
> +out1:
> +    is_write = (rc == H_SUCCESS) ? len : 0;
> +    address_space_unmap(CPU(cpu)->as, gsr->gsb, len, is_write, false);

I don't think this is right, you want to specify the length of memory
you actually accessed, even if there was some error.

Over-specifying I think would be okay. So I think just use len.


> +    return rc;
> +}
> +
> +static target_ulong h_guest_getset_state(PowerPCCPU *cpu,
> +                                         SpaprMachineState *spapr,
> +                                         target_ulong *args,
> +                                         bool set)
> +{
> +    target_ulong flags = args[0];
> +    target_ulong lpid = args[1];
> +    target_ulong vcpuid = args[2];
> +    target_ulong buf = args[3];
> +    target_ulong buflen = args[4];
> +    struct guest_state_request gsr;
> +    SpaprMachineStateNestedGuest *guest;
> +
> +    guest = spapr_get_nested_guest(spapr, lpid);
> +    if (!guest) {
> +        return H_P2;
> +    }
> +    gsr.buf = buf;
> +    gsr.len = buflen;
> +    gsr.flags = 0;

Not a big fan of packaging up some args into a structure,
especially if it's pretty static to a file and no need to be
carried around with some data. Do you even need this gsr
thing?

> +    if (flags & H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) {
> +        gsr.flags |= GUEST_STATE_REQUEST_GUEST_WIDE;
> +    }
> +    if (flags & !H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) {
> +        return H_PARAMETER; /* flag not supported yet */
> +    }
> +
> +    if (set) {
> +        gsr.flags |= GUEST_STATE_REQUEST_SET;
> +    }
> +    return map_and_getset_state(cpu, guest, vcpuid, &gsr);
> +}
> +
> +static target_ulong h_guest_set_state(PowerPCCPU *cpu,
> +                                      SpaprMachineState *spapr,
> +                                      target_ulong opcode,
> +                                      target_ulong *args)
> +{
> +    return h_guest_getset_state(cpu, spapr, args, true);
> +}
> +
> +static target_ulong h_guest_get_state(PowerPCCPU *cpu,
> +                                      SpaprMachineState *spapr,
> +                                      target_ulong opcode,
> +                                      target_ulong *args)
> +{
> +    return h_guest_getset_state(cpu, spapr, args, false);
> +}
> +
>  void spapr_register_nested(void)
>  {
>      spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl);
> @@ -1122,6 +1387,8 @@ void spapr_register_nested_phyp(void)
>      spapr_register_hypercall(H_GUEST_SET_CAPABILITIES, 
> h_guest_set_capabilities);
>      spapr_register_hypercall(H_GUEST_CREATE          , h_guest_create);
>      spapr_register_hypercall(H_GUEST_CREATE_VCPU     , h_guest_create_vcpu);
> +    spapr_register_hypercall(H_GUEST_SET_STATE       , h_guest_set_state);
> +    spapr_register_hypercall(H_GUEST_GET_STATE       , h_guest_get_state);
>  }
>  
>  #else
> diff --git a/include/hw/ppc/spapr_nested.h b/include/hw/ppc/spapr_nested.h
> index 3c0d6a486e..eaee624b87 100644
> --- a/include/hw/ppc/spapr_nested.h
> +++ b/include/hw/ppc/spapr_nested.h
> @@ -206,6 +206,9 @@
>  #define HVMASK_MSR            0xEBFFFFFFFFBFEFFF
>  #define HVMASK_HDEXCR         0x00000000FFFFFFFF
>  #define HVMASK_TB_OFFSET      0x000000FFFFFFFFFF
> +#define H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE 0x8000000000000000 /* BE in GSB 
> */
> +#define GUEST_STATE_REQUEST_GUEST_WIDE       0x1
> +#define GUEST_STATE_REQUEST_SET              0x2
>  
>  #define GUEST_STATE_ELEMENT(i, sz, s, f, ptr, c) { \
>      .id = (i),                                     \
> @@ -336,6 +339,25 @@ struct guest_state_element_type {
>      uint64_t mask;
>  };
>  
> +struct guest_state_element {
> +    uint16_t id;   /* Big Endian */
> +    uint16_t size; /* Big Endian */
> +    uint8_t value[]; /* Big Endian (based on size above) */
> +} QEMU_PACKED;
> +
> +struct guest_state_buffer {
> +    uint32_t num_elements; /* Big Endian */
> +    struct guest_state_element elements[];
> +} QEMU_PACKED;

I think it's probably enough to add one comment saying the PAPR
API numbers are all in BE format. This is actually expected of PAPR
so it goes without saying really, but the nested HV API actually had
some things in guest endian format so it's worth calling out.

Actually maybe single out the nested HV structures as different. I
don't know if the upstream code actually handles endian properly...

Thanks,
Nick

> +
> +/* Actuall buffer plus some metadata about the request */
> +struct guest_state_request {
> +    struct guest_state_buffer *gsb;
> +    int64_t buf;
> +    int64_t len;
> +    uint16_t flags;
> +};
> +
>  /*
>   * Register state for entering a nested guest with H_ENTER_NESTED.
>   * New member must be added at the end.




reply via email to

[Prev in Thread] Current Thread [Next in Thread]