Max
Max

Reputation: 615

how to decompress and write to disk lz4 compressed OS image

I am trying to fetch an OS image with pycurl and write the decompressed data to disk. With gzip it is straight forward, only with lz4 formats I face issues, it seems the write_lz4(buf) decompresses and writes to disk, only when I try to resize the partition, I get an error:

entries is 0 bytes, but this program supports only 128-byte entries. Adjusting accordingly, but partition table may be garbage. Warning: Partition table header claims that the size of partition table entries is 0 bytes, but this program supports only 128-byte entries. Adjusting accordingly, but partition table may be garbage. Creating new GPT entries in memory. The operation has completed successfully. Error: Partition doesn't exist

I could also manage it with io.Byitesio:

if url.endswith('.lz4'):
    with io.BytesIO() as output_buffer:
        curl.setopt(pycurl.WRITEDATA, output_buffer)
        curl.perform()
        output_buffer.seek(0)
        decompressed_data = lz4.frame.decompress(output_buffer.read())
        disk.write(decompressed_data)

But it seems this step is unnecessary. I tried the direct approach but it didn't work. Here is the code:

def write_to_disk(self, url, dev, proxy=None):

    if os.path.isfile(dev):
        size = os.path.getsize(dev)

    with open(os.path.realpath(dev), 'wb') as disk:
        disk.seek(0)

        def write_gz(buf):
            disk.write(d.decompress(buf))

        def write_lz4(buf):
            disk.write(lz4.decompress(buf))

        try:
            curl = pycurl.Curl()
            curl.setopt(pycurl.URL, url)
            if proxy is not False:
                curl.setopt(pycurl.PROXY, proxy)
            curl.setopt(pycurl.BUFFERSIZE, 1024)
            if url.endswith('.lz4'):
                curl.setopt(pycurl.WRITEFUNCTION, write_lz4)
            elif url.endswith('.gz'):
                d = zlib.decompressobj(zlib.MAX_WBITS | 32)
                curl.setopt(pycurl.WRITEFUNCTION, write_gz)
            curl.perform()

        except pycurl.error:
            return False

    if os.path.isfile(dev):
        disk.seek(size - 1)
        disk.write(b"\0")

    return True

Thanks

Upvotes: 0

Views: 155

Answers (1)

Max
Max

Reputation: 615

I ended up implementing the following solution:

def write_to_disk(self, url, dev, proxy=None):
    """Fetch compressed OS image, decompress, and write to disk."""

    if not self.is_block_device(os.path.realpath(dev)):
        print(f"{dev} is not a block device")
        return False

    with open(os.path.realpath(dev), 'wb') as disk:
        disk.seek(0)

        try:
            curl = pycurl.Curl()
            curl.setopt(pycurl.URL, url) 
            if proxy is not False:
                curl.setopt(pycurl.PROXY, proxy)
            curl.setopt(pycurl.BUFFERSIZE, 524288)
            if url.endswith('.gz'):
                d = zlib.decompressobj(zlib.MAX_WBITS | 32)
                def write_gz(buf):
                    disk.write(d.decompress(buf))
                curl.setopt(pycurl.WRITEFUNCTION, write_gz)
                curl.perform()

            elif url.endswith('.zst'):
                # Perform the cURL request and store the data in a buffer
                buffer = bytearray()
                curl.setopt(curl.WRITEFUNCTION, buffer.extend)
                curl.perform()

                # Decompress the received data using zstd.decompress() and write it to disk 
                decompressed_data = zstd.decompress(bytes(buffer))
                disk.write(decompressed_data)
            elif url.endswith('.lz4'):
                # Perform the cURL request and store the data in a buffer
                buffer = bytearray()
                curl.setopt(curl.WRITEFUNCTION, buffer.extend)
                curl.perform()
                 
                # Decompress the received data using lz4.frame.decompress() and write it to disk 
                decompressed_data = lz4.frame.decompress(bytes(buffer))
                disk.write(decompressed_data)
            elif url.endswith('.raw'):
                def write_raw(buf):
                    disk.write(buf)
                curl.setopt(pycurl.WRITEFUNCTION, write_raw)
                curl.perform()

        except pycurl.error as e:
            print("PyCURL error:", e)
            return False

    if os.path.isfile(dev):
        size = os.path.getsize(dev)
        disk.seek(size - 1)
        disk.write(b"\0")

    return True

EDIT: Another one for using qcow2 images which is quite fast:

def write_to_disk(self, url, dev, proxy=None):
    """Fetch OS image, convert to raw format on-the-fly, and write to disk."""
    real_dev = os.path.realpath(dev)
    if not self.is_block_device(real_dev):
        print(f"{dev} is not a block device")
        return False

    with open(os.path.realpath(dev), 'wb') as disk:

        try:
            # Fetch the OS image from the URL into memory
            curl = pycurl.Curl()
            curl.setopt(pycurl.URL, url)
            if proxy:
                curl.setopt(pycurl.PROXY, proxy)
            curl.setopt(pycurl.BUFFERSIZE, 524288)
            if url.endswith('.qcow2'):
                # Write the qcow2 image to a temporary file
                with tempfile.NamedTemporaryFile() as temp_qcow2:
                    curl.setopt(pycurl.WRITEDATA, temp_qcow2)
                    curl.perform()
                    # Convert the qcow2 image to raw format and write directly to the block device
                    subprocess.run(["qemu-img", "convert", "-f", "qcow2", "-O", "raw", temp_qcow2.name, real_dev], check=True)

            elif url.endswith('.gz'):
                d = zlib.decompressobj(zlib.MAX_WBITS | 32)
                def write_gz(buf):
                    disk.write(d.decompress(buf))
                curl.setopt(pycurl.WRITEFUNCTION, write_gz)
                curl.perform()
        except pycurl.error as e:
            print(f"Error downloading the image: {e}")
            return False
        except subprocess.CalledProcessError as e:
            print(f"Error during conversion or writing to disk: {e}")
            return False

    return True

Upvotes: 0

Related Questions