akshay acharya
akshay acharya

Reputation: 163

csv file uploaded to s3 using boto3 is empty in s3

I have two csv files that i am uploading from an ec2 instance to the s3 bucket along with a few other files. All the other files are being uploaded just fine but my csv files, though it is uploaded, there seems ot be no data inside it even though the local copy of the file on the instance is showing the data. im not sure why its saying 0 bytes on the bucket.

the csv file is part of another larger program. here is the code.

from boto3.session import Session
import botocore
import boto3
import zipfile
import darknet
import os
import cv2
import glob
import csv
import numpy as np
global lat_start, lon_start
import shutil


#HELPER FUNCTION DEFINITIONS

ACCESS_KEY = '*********'
SECRET_KEY = '******D'

def image_detection(image_path, network, class_names, class_colors, thresh):
    # Darknet doesn't accept numpy images.
    # Create one with image we reuse for each detect
    width = darknet.network_width(network)
    height = darknet.network_height(network)
    darknet_image = darknet.make_image(width, height, 3)

    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_resized = cv2.resize(image_rgb, (width, height),interpolation=cv2.INTER_LINEAR)

    darknet.copy_image_from_bytes(darknet_image, image_resized.tobytes())
    detections = darknet.detect_image(network, class_names, darknet_image, thresh=thresh)
    darknet.free_image(darknet_image)
    image = darknet.draw_boxes(detections, image_resized, class_colors)
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB), detections
    
def discretize_line(lat_start, lon_start, d_element, d, bearing):

    # d_element -> how many element we need in a line secment

    # global lat_start, lon_start

    R = 6371.0*1000.0
    # -1 because in case of 10 elements/points we also want len(lat_array) the same
    dstep = d/(d_element-1) #0.6524896365354135 #2.0 # meters
    dist_list = np.ones(int(d/dstep))*dstep
    # print(dist_list)
    brg = np.radians(bearing)
    # if d%dstep != 0:
    #   dist_list = np.append(dist_list, d%dstep)

    # This will append lat and lon into array which contains
    # small segments of distance

    lat_array = np.array([np.radians(lat_start)]) # rads
    lon_array = np.array([np.radians(lon_start)]) # rads
    # lat_array = np.array([])
    # lon_array = np.array([])

    for i, dist in enumerate(dist_list):

        ## last element make the waypoint shifted, so we break it
        if i >= (d_element):
            break

        lat1 = lat_array[i]
        lon1 = lon_array[i]
        # print(dist)
        Ad = dist/R
        lat2 = np.arcsin(np.sin(lat1)*np.cos(Ad) + np.cos(lat1)*np.sin(Ad)*np.cos(brg))
        lon2 = lon1 + np.arctan2( (np.sin(brg)*np.sin(Ad)*np.cos(lat1)) , (np.cos(Ad) - np.sin(lat1)*np.sin(lat2)))
        lat_array = np.append(lat_array, lat2)
        lon_array = np.append(lon_array, lon2)



        # print(i)

    return lat_array, lon_array
    
    
def get_distance_bearing(lat1, lon1, lat2, lon2):

    # global lat_start, lon_start

    R = 6371.0*1000.0
    lat_start = np.radians(lat1)
    lon_start = np.radians(lon1)
    lat_end = np.radians(lat2)
    lon_end = np.radians(lon2)
    dLat = lat_end - lat_start
    dLon = lon_end - lon_start

    a = np.sin(dLat/2.0)*np.sin(dLat/2.0) + np.cos(lat_start)*np.cos(lat_end)*np.sin(dLon/2.0)*np.sin(dLon/2.0)
    c = 2.0*np.arctan2(np.sqrt(a),np.sqrt(1-a))

    d = c*R

    y = np.sin(dLon)*np.cos(lat_end)
    x = np.cos(lat_start)*np.sin(lat_end) - np.sin(lat_start)*np.cos(lat_end)*np.cos(dLon)
    bearing = np.degrees(np.arctan2(y,x))

    return d, bearing
    
    
def upload_to_aws(local_file, bucket, s3_file):
    s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
                      aws_secret_access_key=SECRET_KEY)

    try:
        s3.upload_file(local_file, bucket, s3_file)
        print("Upload Successful")
        return True
    except FileNotFoundError:
        print("The file was not found")
        return False
    except NoCredentialsError:
        print("Credentials not available")
        return False
    


##END OF FUNCTION DEFINITIONS ##



#Unzip the zip file and its contents
print("unzipping")
path_to_zip_file = "/home/ubuntu/pano/Zip/Videos.zip"
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall("/home/ubuntu/pano/Video")
    print("Finished Unzipping")
#End of Unzip


# CSV  open and declaration##
data_file_path = "/home/ubuntu/pano/stack/quantity.csv"
data_file = open(data_file_path, "w+")
dataCSVWriter = csv.writer(data_file, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
dataCSVWriter.writerow(['lat', 'lon', 'Quantity'])

#CSV for lane thumbnail
thumbnail_data_file_path = "/home/ubuntu/pano/stack/lane_thumbnail.csv"
thumbnail_data_file = open(thumbnail_data_file_path, "w+")
thumbnail_dataCSVWriter = csv.writer(thumbnail_data_file, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
thumbnail_dataCSVWriter.writerow(['lat', 'lon'])



#Define start and end point lists
#start_point_list = [(35.841454251754755,  139.52427014959153),(35.84147944801779, 139.52420150963678)]
start_point_list = [(36.12083710338884, 139.21630320454503),(36.12080527337101, 139.2164926108044)]
#end_point_list = [(35.84151350159559, 139.52424466860762),(35.84144222040454, 139.52422739581436)]
end_point_list = [(36.12083735438514, 139.2164757318577),(36.12081575161991, 139.21630345327617)]
wp_lat_array = np.array([])
wp_lon_array = np.array([])





##Split th eline into points and it is stored in lat array lon array
"""for i in range(len(start_point_list)):
    ## input two points and find a slicing waypoint between it
    distance, bearing_deg = get_distance_bearing(start_point_list[i][0], start_point_list[i][1], end_point_list[i][0], end_point_list[i][1])
    print(distance)


    lat_array, lon_array = discretize_line(start_point_list[i][0], start_point_list[i][1], float(d_element[i]), distance, bearing_deg)"""

#Initialize the detector variables and paths
quantity_bottles_frame = []
config_file = "/home/ubuntu/darknet_bottle_example/yolov4_bottle_can.cfg"
data_file = "/home/ubuntu/darknet_bottle_example/obj_bottle_can.data"
weights = "/home/ubuntu/darknet_bottle_example/yolov4_bottle_can_best.weights"

network, class_names, class_colors = darknet.load_network(
        config_file,
        data_file,
        weights,
        batch_size=1
    )

image_dir = "/home/ubuntu/pano/Frames"

#1.Split into frames
path = "/home/ubuntu/pano/Video/Panorama/Videos"
j = 0


"""Order of events
1. Split into frames
2. Rotate images if needed
3. Running through detctor
4. Calculate count and draw bounding boxes
5. Store these images in respective directoies
6. Take start point of lane and end point and split into many coordinates in between based on number of frames
7. Write to csv file
8. Stack the images per lane
9. Empty the Frames folder after every lane
10. Upload stacked images and csv to cloud """
# Parameter to change is fps in the ffmpeg command. Change accoprding to need based on reference
for filename in os.listdir(path):
    if (filename.endswith(".mp4")): #or .avi, .mpeg, whatever.
        j += 1
        path1 = path + filename
        print(path1)
        os.system("ffmpeg -i /home/ubuntu/pano/Video/Panorama/Videos/{0} -vf fps=0.07 /home/ubuntu/pano/Frames/{1}-%3d.jpg".format(filename,j))
        
        #2. Rotate images if needed
        frames_path = "/home/ubuntu/pano/Frames/*.jpg"
        list_images = glob.glob(frames_path)
        list_sorted = sorted(list_images)
        #for image in list_sorted:
            #read the image
         #   temp = cv2.imread(image)
         #   image1 = cv2.rotate(temp, cv2.ROTATE_90_COUNTERCLOCKWISE)
         #  cv2.imwrite("{0}".format(image), image1)
         
        ## according to how many partial panorama we have in each lane    
        d_element =[len(list_images)]
            
        print(f"Now detecting objects in lane {j}")
        
        #3. Running through detctor
        frame_number = 1
        for image in sorted(os.listdir(image_dir)):
         
            
            #Path to the input images for the detector i.e Frames
            quantity_frame = 0
            image_name = f"{image}"
            ext = '.jpg'
            input_image_name = image_name 
            image_path = os.path.join(image_dir, input_image_name)
            print(image_path)

            #Path to output images to be stored after running through detector
            output_dir = f"/home/ubuntu/pano/lane{j}"
            output_name = "yolo_" + image_name 
            output_path = os.path.join(output_dir, output_name)

            # image = load_images(image_path)
            dn_frame_width = 416
            dn_frame_height = 416

            frame = cv2.imread(image_path)

            frame_width = frame.shape[1]
            frame_height = frame.shape[0]
            
            #### Passing the image to darknet
            image, detections = image_detection(image_path, network, class_names, class_colors, thresh=0.05)
            
            #cv2.imwrite(f'/home/ubuntu/temp/Inference{frame_number}.jpg', image)
            #cv2.imwrite(f'/home/ubuntu/temp/orignal_detect{frame_number}.jpg', frame)
            
            ###Based on the detections, running them through a loop to draw bounding box and also incrememnt count of object in the frame
            #4. Calculate count and draw bounding boxes
            for i in range(len(detections)):
                xc_percent = detections[i][2][0]/dn_frame_width
                yc_percent = detections[i][2][1]/dn_frame_height 
                w_percent = detections[i][2][2]/dn_frame_width
                h_percent = detections[i][2][3]/dn_frame_height

                xc = xc_percent*frame_width
                yc = yc_percent*frame_height
                w = w_percent*frame_width
                h = h_percent*frame_height

                xmin = xc - w/2.0
                ymin = yc - h/2.0
                xmax = xc + w/2.0
                ymax = yc + h/2.0
                
                
                #If object is detected, increase the count of the object in the frame
                if detections[i][0] == "bottle":
                    cv2.rectangle(frame, (int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,0,255),2)
                    cv2.putText(frame, "bottle", (int(xmin), int(ymin-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
                    quantity_frame += 1
                
                elif detections[i][0] == "can":
                    cv2.rectangle(frame, (int(xmin),int(ymin)),(int(xmax),int(ymax)),(255,0,0),2)
                    cv2.putText(frame, "can", (int(xmin), int(ymin-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,0,0), 2)
                else:
                    print(f"{image} has no objects ")
            
                
                print(f"Quantity in frame {frame_number} = {quantity_frame}")
            #5. Store these images in respective directoies
            cv2.imwrite(output_path, frame)
            quantity_bottles_frame.append(quantity_frame)
            frame_number += 1   


        ###Split the points into equidistant points between start point and end point
        ##6. Take start point of lane and end point and split into many coordinates in between based on number of frames    
        distance, bearing_deg = get_distance_bearing(start_point_list[j-1][0], start_point_list[j-1][1], end_point_list[j-1][0], end_point_list[j-1][1])
        print(distance)
        lat_array, lon_array = discretize_line(start_point_list[j-1][0], start_point_list[j-1][1], float(d_element[0]), distance, bearing_deg)

        lat_csv = []
        lon_csv = []  
        
        ##Convery those points into degrees
        
        for lat,lon in zip(lat_array, lon_array):
            lat_degrees = "{:}".format(np.degrees(lat))
            lon_degrees = "{:}".format(np.degrees(lon))
            lat_csv.append(lat_degrees)
            lon_csv.append(lon_degrees)  
            #lat_csv = "{:}".format(np.degrees(lat))
            #lon_csv = "{:}".format(np.degrees(lon))
            
        ##7.Write each row in the csv file
        for k in range(d_element[0]):
            dataCSVWriter.writerow([lat_csv[k], lon_csv[k], quantity_bottles_frame[k]])
            #if k != d_element[0]-1:
            #    dataCSVWriter.writerow([lat_csv[k], lon_csv[k], quantity_bottles_frame[k], "-", "-" ])
            if k ==d_element[0]-1:
                print(lat_csv[int(d_element[0]/2)])
                thumbnail_dataCSVWriter.writerow([ lat_csv[int(d_element[0]/2)],lon_csv[int(d_element[0]/2)]])
            
        #####8.STACKING THE IMAGES ######
                
        images = []   
        stacking_input = f"/home/ubuntu/pano/lane{j}/*.jpg"
        list_images = glob.glob(stacking_input)
        #print(list_images)
        stacking_input_reverse = sorted(list_images, reverse = True)
        print(stacking_input_reverse)
        for image in stacking_input_reverse:
            img = cv2.imread(image)
            images.append(img)
        final_image = cv2.hconcat(images)
        image_name = f"cloud_lane{j}_stack.jpg"
        stacking_output = f"/home/ubuntu/pano/stack"
        output_path = os.path.join(stacking_output, image_name)
        cv2.imwrite(output_path, final_image)
        
        
        ##### 9. DELETE FRAMES AFTER ONE ITERATION OF LOOP #####
        
        for f in os.listdir(image_dir):
            del_path = "/home/ubuntu/pano/Frames/" + f
            os.remove(del_path)
        
        
    else:
        continue

#Close csv file 
#data_file.close()
#thumbnail_data_file.close()

### 10. Upload to s3 bucket  ####
    
stack_path = "/home/ubuntu/pano/stack"    
for file in sorted(os.listdir(stack_path)):
    print(f"Uploading {file}")
    uploaded = upload_to_aws(f'/home/ubuntu/pano/stack/{file}', 'fbt-pano-test', f'{file}')
    

Do i need to close the csv file in any way? Or does s3 not support csv upload through boto3?

Upvotes: 0

Views: 1257

Answers (1)

akshay acharya
akshay acharya

Reputation: 163

I found it. Turns out, the csv files werent closed at the end. So i moved the upload to s3 part to another program. now python closes the csv files at the end of this program automatically. and so when the upload program runs next, it gets uploaded properly.

Upvotes: 2

Related Questions