netikras
netikras

Reputation: 512

Util to extract bits from byte array into a new byte[]

I'm trying to build a utility class to make bitwise manipulation and conversions more readable. Currently I'm stuck at building a method to extract bits from a byte array and form a new byte[] from them. Needless to say I'm not very fluent in bitwise operations.

I believe it could probably be achieved using BitSet, but there would be too many conversions and the implementation would be Java-speciffic. It would be nice to have a clear algorithm that could be easily ported to other languages later on.

So far I've got that far:

    public static byte[] toBytes(int offset /*full bytes*/, int bitsOffset /*bytes + bits*/, int bitsCount, byte... bytes) {
        int bytesCount = bitsCount / 8;
        int paddingBits = bitsCount % 8;
        int partialBits = 8 - paddingBits;

        if (paddingBits > 0) {
            bytesCount++;
        }

        byte[] data = new byte[bytesCount];

        return data;
    }

I've commented the above out and replaced it temporary with

    public static byte[] toBytes(int offset, int bitsOffset, int bitsCount, byte... bytes) {
        int firstBitIndex = (offset * 8) + bitsOffset;
        return new BigInteger(new BigInteger(1, bytes).toString(2).substring(firstBitIndex, firstBitIndex + bitsCount), 2).toByteArray();
    }

However I'd still like to have a proper implementation with as little overhead as possible and not speciffic to Java (no java-speciffic tools used, like BitSet)

This is a hint what I'd expect it to do

   /**
     * [0000 0110   1111 0010] = toBytes(1, 4, 12, [xxxx xxxx   xxxx 0110   1111 0010   xxxx xxxx])
     * [0000 0110   1111 0010] = toBytes(1, 5, 12, [xxxx xxxx   xxxx x011   0111 1001   0xxx xxxx])
     * [0000 0110   1111 0010] = toBytes(1, 6, 12, [xxxx xxxx   xxxx xx01   1011 1100   10xx xxxx])
     */

And here are some unittests

public class ByteUtilTest {

    @Test
    public void toBytes_sameByte() {
        byte[] result = ByteUtil.toBytes(1, 4, 3,
                toByte("11111111"),
                toByte("11110111"),
                toByte("11111111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000011")), toBinaryString(result));
    }

    @Test
    public void toBytes_sameByte_full() {
        byte[] result = ByteUtil.toBytes(1, 0, 8,
                toByte("11111111"),
                toByte("01110101"),
                toByte("11111111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("01110101")), toBinaryString(result));
    }

    @Test
    public void toBytes_sameByte_noneWithoutOffset() {
        byte[] result = ByteUtil.toBytes(1, 0, 0,
                toByte("11111111"),
                toByte("01110101"),
                toByte("11111111"),
                toByte("11111111"));

        assertEquals(0, result.length);
    }

    @Test
    public void toBytes_sameByte_noneWithOffset() {
        byte[] result = ByteUtil.toBytes(1, 3, 0,
                toByte("11111111"),
                toByte("01110101"),
                toByte("11111111"),
                toByte("11111111"));

        assertEquals(0, result.length);
    }

    @Test
    public void toBytes_twoBytes_resultWithTwoBytes() {
        byte[] result = ByteUtil.toBytes(1, 2, 11,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000110"), toByte("10110011")), toBinaryString(result));
    }

    @Test
    public void toBytes_twoBytes_resultWithOneByte() {
        byte[] result = ByteUtil.toBytes(1, 2, 7,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("01101011")), toBinaryString(result));
    }

    @Test
    public void toBytes_twoBytes_firstFull() {
        byte[] result = ByteUtil.toBytes(1, 0, 11,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011111"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000011"), toByte("10101100")), toBinaryString(result));
    }

    @Test
    public void toBytes_twoBytes_lastFull() {
        byte[] result = ByteUtil.toBytes(1, 5, 11,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000101"), toByte("10011101")), toBinaryString(result));
    }

    @Test
    public void toBytes_twoBytes_bothFull() {
        byte[] result = ByteUtil.toBytes(1, 0, 16,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("01110101"), toByte("10011101")), toBinaryString(result));
    }

    @Test
    public void toBytes_threeBytes() {
        byte[] result = ByteUtil.toBytes(1, 2, 19,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("10111111"));

        assertEquals(
                toBinaryString(
                        toByte("00000110"),
                        toByte("10110011"),
                        toByte("10110111")),
                toBinaryString(result));
    }

    @Test
    public void toBytes_threeBytes_firstFull() {
        byte[] result = ByteUtil.toBytes(1, 0, 19,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("10111111"));

        assertEquals(
                toBinaryString(
                        toByte("00000011"),
                        toByte("10101100"),
                        toByte("11101101")),
                toBinaryString(result));
    }

    @Test
    public void toBytes_threeBytes_lastFull() {
        byte[] result = ByteUtil.toBytes(1, 2, 22,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("10111111"));

        assertEquals(
                toBinaryString(
                        toByte("00110101"),
                        toByte("10011101"),
                        toByte("10111111")),
                toBinaryString(result));
    }

    @Test
    public void toBytes_threeBytes_allFull() {
        byte[] result = ByteUtil.toBytes(1, 0, 24,
                toByte("11111111"),
                toByte("01110101"),
                toByte("10011101"),
                toByte("10111111"));

        assertEquals(
                toBinaryString(
                        toByte("01110101"),
                        toByte("10011101"),
                        toByte("10111111")),
                toBinaryString(result));
    }



    @Test
    public void toBytes_bitsOffset_4() {
        byte[] result = ByteUtil.toBytes(1, 4, 12,
                toByte("11111111"),
                toByte("11110110"),
                toByte("11110010"),
                toByte("11111111"));

        assertEquals(toBinaryString(toByte("00000110"), toByte("11110010")), toBinaryString(result));
    }

    @Test
    public void toBytes_bitsOffset_5() {
        byte[] result = ByteUtil.toBytes(1, 5, 12,
                toByte("11111111"),
                toByte("11111011"),
                toByte("01111001"),
                toByte("01111111"));

        assertEquals(toBinaryString(toByte("00000110"), toByte("11110010")), toBinaryString(result));
    }

    @Test
    public void toBytes_bitsOffset_6() {
        byte[] result = ByteUtil.toBytes(1, 6, 12,
                toByte("11111111"),
                toByte("11111101"),
                toByte("10111100"),
                toByte("10111111"));

        assertEquals(toBinaryString(toByte("00000110"), toByte("11110010")), toBinaryString(result));
    }

    private String toBinaryString(byte... data) {
        StringBuilder binaryStr = new StringBuilder();
        String value = Integer.toBinaryString(data[0]);
        if (value.length() > 8) value = value.substring(value.length() - 8);
        else if (value.length() < 8) value = String.format("%8s", value).replace(" ", "0");
        binaryStr.append(value);
        for (int i = 1; i < data.length; i++) {
            value = Integer.toBinaryString(data[i]);
            if (value.length() > 8) value = value.substring(value.length() - 8);
            else if (value.length() < 8) value = String.format("%8s", value).replace(" ", "0");
            binaryStr.append(" ").append(value);
        }

        return binaryStr.toString();
    }


    private String toString(byte[] data) {
        return Arrays.toString(data);
    }

    private byte toByte(String binary) {
        return (byte) Integer.parseInt(binary, 2);
    }
}

Upvotes: 1

Views: 684

Answers (1)

Socowi
Socowi

Reputation: 27225

So far I've got [...] byte[] data = new byte[bytesCount];

Unfortunately this approach will only work if your bit offset is a multiple of 8. In all other cases you have to divide every byte you intend to copy. The following drawing illustrates how to divide each byte and where to put the divided parts.

MSB = most significant bit
LSB = least significant bit

Algorithm illustrated

Implementing the algorithm illustrated above is a bit tricky because of a lot of corner cases. The following implementation passes all your tests and all of my tests. I used many variables to give all the calculations meaningful names in the hope that it will be easier to follow. You can shorten the implementation by eliminating some of these variables and calculate some values in-place.

I took the liberty to rename your function toBytes to bitSubstring. The former name toBytes seemed a bit off for a method that already takes bytes as an input.

public static byte[] bitSubstring(int byteOffset, int bitOffset,
                                  int lengthInBits, byte... source) {
    return bitSubstring(8 * byteOffset + bitOffset, lengthInBits, source);
}

public static byte[] bitSubstring(int startBit, int lengthInBits,
                                  byte... source) {
    assert startBit >= 0 && startBit < 8 * source.length;
    assert lengthInBits >= 0 && startBit + lengthInBits <= 8 * source.length;

    int lengthInBytes = (int) Math.ceil(lengthInBits / 8.0);
    byte[] target = new byte[lengthInBytes];
    int startByte = startBit / 8;
    int endBitExclusive = startBit + lengthInBits;
    int endByteExclusive = (int) Math.ceil(endBitExclusive / 8.0);
    int sourceBytesToRead = endByteExclusive - startByte;
    int lowerPartSize = 8 * endByteExclusive - endBitExclusive;
    int shiftLowerUp = (8 - lowerPartSize);
    int shiftUpperDown = lowerPartSize;
    int lastSrc = 0;
    if (sourceBytesToRead > lengthInBytes) {
        lastSrc = source[startByte] & 0xFF;
        startByte++;
    }
    for (int targetByte = 0; targetByte < target.length; ++targetByte) {
        int curSrc = source[startByte + targetByte] & 0xFF;
        target[targetByte] |= (lastSrc << shiftLowerUp)
                            | (curSrc >>> shiftUpperDown);
        lastSrc = curSrc;
    }
    int overhang = 8 * lengthInBytes - lengthInBits;
    if (overhang > 0) {
        target[0] &= 0xFF >>> overhang;
    }
    return target;
}

Above algorithm should be fairly fast. However, if you are only interested in implementation size and readability an approach where you copy bit by bit is better.

public static byte[] bitSubstringSlow(int startBitSource, int lengthInBits,
                                      byte... source) {
    byte[] target = new byte[(int) Math.ceil(lengthInBits / 8.0)];
    int startBitTarget = (8 - lengthInBits % 8) % 8;
    for (int i = 0; i < lengthInBits; ++i) {
        setBit(target, startBitTarget + i, getBit(source, startBitSource + i));
    }
    return target;
}

public static int getBit(byte[] source, int bitIdx) {
    return (source[bitIdx / 8] >>> (7 - bitIdx % 8)) & 1;
}

public static void setBit(byte[] target, int bitIdx, int bitValue) {
    int block = bitIdx / 8;
    int shift = 7 - bitIdx % 8;
    target[block] &= ~(1 << shift);
    target[block] |= bitValue << shift;
}

… or less re-usable but even shorter:

public static byte[] bitSubstringSlow2(int startBitSource, int lengthInBits,
                                       byte... source) {
    byte[] target = new byte[(int) Math.ceil(lengthInBits / 8.0)];
    int startBitTarget = (8 - lengthInBits % 8) % 8;
    for (int i = 0; i < lengthInBits; ++i) {
        int srcIdx = startBitSource + i;
        int tgtIdx = startBitTarget + i;
        target[tgtIdx / 8] |= ((source[srcIdx / 8] >>> (7 - srcIdx % 8)) & 1)
                              << (7 - tgtIdx % 8);
    }
    return target;
}

Upvotes: 2

Related Questions