Bruno
Bruno

Reputation: 133

binary file manipulation in python

I need a help in my code, I am not able to do the binary file manipulation.

code for creating the binary file:

import struct
numeros = [-9 ,20.5, 6, 10.8, 10, 8.0, 45, -99.6 ,12, -54.7];

try:
  with open('value.bin', "wb") as arq:
    arq.write(struct.pack("i", 5))
    for num in numeros:
        if isinstance(num, int):
            arq.write(struct.pack("i", num))
        elif isinstance(num, float):
            arq.write(struct.pack("f", num))
except IOError:
        print("Error opening or handling file.")

code for file manipulation:

try:

    with open('value.bin', 'r+b') as arq:
      n  = struct.unpack('=i', arq.read(4))[0]
      for i in range(n):
        j = i
        numInt= struct.unpack('=i', arq.read(4))
        numFloat = struct.unpack('=f', arq.read(4))
        arq.seek(4)
        if numInt[0] <10:
            arq.write(struct.pack("i", -1))
        elif numFloat[0] >9.0:
            arq.write(struct.pack("f", 9999.0))

except IOError:
        print('Error opening or handling file.')

correct output:

 5 -1 9999.0 -1 9999.0 10 8.0 45 -99.6 12 -54.7

Exit from my code:

5  -9 20.5 6 10.80000019073486310 8.0 45 -99.599998474121112 -54.70000076293945

the numbers -9, 20.5, 6, 10.8 are not being changed.

I need this help.

Upvotes: 0

Views: 411

Answers (1)

CristiFati
CristiFati

Reputation: 41167

code.py:

import sys
import struct
import struct
import os


int_format = "i"
float_format = "f"
int_size = struct.calcsize(int_format)
float_size = struct.calcsize(float_format)


def write_file(file_name):
    numeros = [-9, 20.5, 6, 10.8, 10, 8.0, 45, -99.6, 12, -54.7]
    print("Original numbers:", numeros)
    try:
        with open(file_name, "wb") as arq:
            arq.write(struct.pack(int_format, len(numeros) // 2))
            for num in numeros:
                if isinstance(num, int):
                    arq.write(struct.pack(int_format, num))
                elif isinstance(num, float):
                    arq.write(struct.pack(float_format, num))
    except IOError as e:
        print("Error opening or handling file:", e)


def read_file(file_name):
    ret = list()
    try:
        with open(file_name, "rb") as arq:
            n  = struct.unpack(int_format, arq.read(int_size))[0]
            ret.append(n)
            for i in range(n):
                #num_int = struct.unpack(int_format, arq.read(int_size))[0]
                #num_float = struct.unpack(float_format, arq.read(float_size))[0]
                num_int, num_float = struct.unpack(int_format + float_format, arq.read(int_size + float_size))
                ret.extend([num_int, num_float])
    except IOError as e:
        print("Error opening or handling file:", e)
    return ret


def modify_file(file_name):
    int_replacement = struct.pack(int_format, -1)
    float_replacement = struct.pack(float_format, 9999.0)
    try:
        with open(file_name, "r+b") as arq:
            n  = struct.unpack(int_format, arq.read(int_size))[0]
            for i in range(n):
                #num_int = struct.unpack(int_format, arq.read(int_size))[0]
                #num_float = struct.unpack(float_format, arq.read(float_size))[0]
                num_int, num_float = struct.unpack(int_format + float_format, arq.read(int_size + float_size))
                if num_int < 10:
                    arq.seek(-(int_size + float_size), os.SEEK_CUR)
                    arq.write(int_replacement)
                    arq.seek(float_size, os.SEEK_CUR)
                if num_float > 9.0:
                    arq.seek(-float_size, os.SEEK_CUR)
                    arq.write(float_replacement)
    except IOError as e:
        print("Error opening or handling file:", e)


def main():
    file_name = "value.bin"
    write_file(file_name)
    print("Original file content:", read_file(file_name))
    modify_file(file_name)
    print("Modified file content:", read_file(file_name))


if __name__ == "__main__":
    print("Python {:s} on {:s}\n".format(sys.version, sys.platform))
    main()

Notes:

  • Module documentation: [Python]: struct - Interpret bytes as packed binary data
  • Stored values used many times in the code as variables. Example: the int format: int_format = "i"
  • Also used struct.calcsize to replace the value sizes that are read from file (as a coincidence both int and float are 4, but it's more general this way)
  • Split the program in functions:
    • write_file - generates the file based on the numeros list
    • read_file - reads (and parses) file content and returns it as a list
    • modify_file - reads the file content and replaces some of the values on the way
      • Calculates the new values that are going to replace old ones (e.g. int_replacement once, before the loop)
      • The conditions (e.g. if numInt[0] > 10) were reversed (got corrected in the meantime)
      • By using if / elif when converting the int, if the float coming after it needs to be converted as well, it wouldn't be. Changed to 2 separate if statements
      • The main flaw was the way how [Python]: seek(offset[, whence]) was performed: always to the beginning of the file. This is not correct: seek from the current position (os.SEEK_CUR):
        • When changing the int: seek back 8 bytes (the float and the int), write 4 bytes (which will move the file pointer 4 bytes forward) and then seek forward 4 more bytes (the float that comes after it) - so at the end the file pointer is in the same position
        • When changing the float: simpler, seek back 4 bytes, and write 4 bytes (which will move the file pointer 4 bytes forward - where it was)
    • main- aggregates the above
  • In read_file and modify_file, the int and the float are read at once, so the 2 commented lines are equivalent to the line below them
  • Some other minor changes, there could be more, but I don't want to overcomplicate the code
  • The floats are different because of precision loss during conversion, but if printed with one decimal only, will match the original ones

Output:

(py35x64_test) e:\Work\Dev\StackOverflow\q050106975>python code.py
Python 3.5.4 (v3.5.4:3f56838, Aug  8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)] on win32

Original numbers: [-9, 20.5, 6, 10.8, 10, 8.0, 45, -99.6, 12, -54.7]
Original file content: [5, -9, 20.5, 6, 10.800000190734863, 10, 8.0, 45, -99.5999984741211, 12, -54.70000076293945]
Modified file content: [5, -1, 9999.0, -1, 9999.0, 10, 8.0, 45, -99.5999984741211, 12, -54.70000076293945]

Upvotes: 1

Related Questions