New2Java
New2Java

Reputation: 313

JAVA Code to split CSV file into different CSV files and extracting a single column data from parent file to child files

I have a CSV file which contains almost 10000 lines of data. I want to split that file into 10 different CSV file based on the total line count, so that each file can contain 1000 lines of data in the order first file should have 1-1000 lines, second file should have 1001-2000 lines and so on. Also, each of those 10 different CSV file should only contain the data from the first column of the parent CSV file. The code which I developed writes the same data (1.e 1-1000 lines) to all of the 10 csv files. I am unable to figure out what is the mistake in the code.

for (int j=1;j<=files;j++){  

   String inputfile = "C:/Users/Downloads/File.csv";
   BufferedReader br = new BufferedReader(new FileReader(inputfile)); 
   FileWriter fstream1 = new FileWriter("C:/Users/Downloads/FileNumber_"+j+".csv");       
   BufferedWriter out = new BufferedWriter(fstream1);  

   String strLine = null; 

   for (i=i+1;i<=(j*lines);i++) {   //I Have declared i as static int i = 0 and have already calculated lines and files in other part of code

    strLine = br.readLine();   
    if (strLine!= null) { 

        String strar[] = strLine.split(",");
        out.write(strar[0]);   
        if(i!=(j*lines)) {  
            out.newLine(); }  
    }
   }


   out.close();   

Upvotes: 0

Views: 13736

Answers (4)

selva kumar
selva kumar

Reputation: 31

Use this code 

import java.io.*;  
import java.util.Scanner;  
public class csvfilesplit
{
    public static void main(String[] args) throws IOException {
        int split;      
        Scanner reader = new Scanner(System.in);  // Reading from System.in
        System.out.println("\n Enter The count to split each file :-----------");
        int  s = reader.nextInt();
        File folder = new File("file/");                                //*** Location of your file 
        int filecount = 0;
            for (File fo :
            folder.listFiles()) {
                    if (fo.isFile()) {
                            filecount++;}
                        }
        System.out.println("Total source file count is :-----------------------    "+filecount+"\n");  //*** Total numbr of orginal file in mentioned folder
        String path = folder.getAbsolutePath();
       // System.out.println("location=-----------"+path);
        File[] listOfFiles = folder.listFiles();
        for (int l = 0; l < listOfFiles.length; l++) {
         if (listOfFiles[l].isFile()) {
           System.out.println("File name Is :--------------------------   " + listOfFiles[l].getName());  //*** File name
            BufferedReader bufferedReader = new BufferedReader(new FileReader(path+"/"+listOfFiles[l].getName()));   // Read a souce file
            String input;
            int count = 0;
            while((input = bufferedReader.readLine()) != null)
            {
             count++;
            }  
     System.out.println("File total rows count is :--------------   "+count);   //*** Number of row count in the file
     split= count/s;

     int n = split,z=0;
     if(n!=z)
     {
      System.out.println("Each splitted file line count is :------   "+split+"\n"); //*** After splitted  file have the rows count
      FileInputStream fstream = new FileInputStream(path+"/"+listOfFiles[l].getName()); DataInputStream in = new DataInputStream(fstream);  
      BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine;  
      for (int j=1;j<=s;j++)  
       {  
        File dir = new File(path+"/"+"CSV_DATA_"+j);
        dir.mkdir(); 
        File filefolder = new File(path+"/"+"CSV_DATA_"+j);
        String folderpath = filefolder.getAbsolutePath();         
        //********Destination File Location******
        FileWriter fstream1 = new FileWriter(folderpath+"/"+listOfFiles[l].getName()+".csv");   //*** Splitted files  and file format(.txt/csv.....)
        BufferedWriter out = new BufferedWriter(fstream1);   
        for (int i=1;i<=n;i++)  
         {  
         strLine = br.readLine();   
         if (strLine!= null)  
           {  
           out.write(strLine);   
            if(i!=n)  
             {  
             out.newLine();  
             } 
            }     
          }  
          out.close(); 
              } 
  in.close();  
    }  
    else
        {// Below N number of row in this file
            System.out.println("\n******************************* Mentioned this file have below - "+s+" rows   ******************************   "+listOfFiles[l].getName()+" \n");}
       }
}
System.out.println("\n Splitted_CSV_files stored location is :     "+path);
 }
}

Upvotes: 2

Hasnain Ali Bohra
Hasnain Ali Bohra

Reputation: 2180

Please find the Below code:-

public static void main(String[] args) throws IOException {
           //first read the file
           String inputfile = "C:/Users/bohrahas/Desktop/SampleCSVFile.csv";
           BufferedReader br = new BufferedReader(new FileReader(inputfile)); 
           //create thje first file which will have 1000 lines
           File file = new File("C:/Users/bohrahas/Desktop/FileNumber_"+1+".csv");
            FileWriter fstream1 = new FileWriter(file);
            BufferedWriter out = new BufferedWriter(fstream1);  
               String line="";
               //count the number of line
               int count=1;
               int file_Number=2;
               while ((line = br.readLine()) != null) 
               {
                   //if the line is divided by 1000 then create a new file with file count
                   if(count % 1000 == 0)
                   {
                       File newFile = new File("C:/Users/bohrahas/Desktop/FileNumber_"+file_Number+".csv");
                       fstream1 = new FileWriter(newFile);
                       file_Number++;
                       out = new BufferedWriter(fstream1); 
                   }
                    if(line.indexOf(",")!=-1)
                    line=line.substring(0, line.indexOf(","));
                    out.write(line);
                    out.newLine();
                   count++;
               }

}

Logic :-

  1. You don't have to read the parent file for every loop. Just load it once i.e create a object once and then process the parent file.

  2. While reading every line of parent get the whole line and just remove the columns except first column.

  3. Till the first "," is always a first column. so remove the string after '',"
  4. if the line traversed count is divided completely by 1000 i.e. 2000,3000,4000....etc. create a new file and create a BufferWriter for that.

Upvotes: 0

PotatoManager
PotatoManager

Reputation: 1775

Your logic is very bad here. I rewrote the whole code for you,

import java.io.*;  
import java.util.Scanner;  


public class FileSplit {  

public static void myFunction(int lines, int files) throws FileNotFoundException, IOException{

    String inputfile = "file.csv";
    BufferedReader br = new BufferedReader(new FileReader(inputfile)); //reader for input file intitialized only once
    String strLine = null; 
    for (int i=1;i<=files;i++) { 
        FileWriter fstream1 = new FileWriter("FileNumber_"+i+".csv"); //creating a new file writer.       
        BufferedWriter out = new BufferedWriter(fstream1);  
        for(int j=0;j<lines;j++){   //iterating the reader to read only the first few lines of the csv as defined earlier
             strLine = br.readLine();   
            if (strLine!= null) { 
               String strar[] = strLine.split(",");
               out.write(strar[0]);   //acquring the first column
               out.newLine();   
            } 
        }
        out.close(); 
        }  
   }

public static void main(String args[])  
{  
 try{  
     int lines = 2;  //set this to whatever number of lines you need in each file
     int count = 0;
     String inputfile = "file.csv";
     File file = new File(inputfile);  
     Scanner scanner = new Scanner(file);  
     while (scanner.hasNextLine()) {  //counting the lines in the input file
        scanner.nextLine();  
        count++;  
      }  
     System.out.println(count);
     int files=0;  
     if((count%lines)==0){  
        files= (count/lines);  
      }  
      else{  
         files=(count/lines)+1;  
      }   
      System.out.println(files); //number of files that shall eb created

      myFunction(lines,files);
 }

 catch (FileNotFoundException e) {
       e.printStackTrace();
 }
 catch (IOException e) {
  e.printStackTrace();
 }
}  

}

Upvotes: -2

JVOneLife
JVOneLife

Reputation: 74

The problem of having same lines in each of 10 csv files is because of the line below in method myFunction

BufferedReader br = new BufferedReader(new FileReader(inputfile));

The logic using variables i,j,lines works perfectly. But every time myFunction is called, br (BufferedReader for input file) is initialized again.

So br.readLine() will start reading from start. And thus having same 1000 lines in each of the 10 csv files.

Hope it helps!

Upvotes: 1

Related Questions