Tuesday, December 11, 2012

Split one large file to multiple small files based on break length - Large file handling.

Inputs:

fileName: Absolute File Name path to be split.
lineBreak: number of lines when the break needs to happen.
outFileNamePrefix: output file name prefix, absolute path.

Code:


IDataCursor pipelineCursor = pipeline.getCursor();
  String fileName = IDataUtil.getString( pipelineCursor, "fileName" );
  String lineBreak = IDataUtil.getString( pipelineCursor, "lineBreak" );
  String outFileNamePrefix = IDataUtil.getString( pipelineCursor, "outFileNamePrefix" );
  pipelineCursor.destroy();  
  
  int breakLenInt=Integer.parseInt(lineBreak);
  int count=0;
  int currentLineNumber=0;
  String line=null;
  
  FileInputStream inputStream;
  FileWriter fstream;
  
  try {
   inputStream = new FileInputStream(new File(fileName));
   InputStreamReader streamReader = new InputStreamReader(inputStream, "UTF-8");
   BufferedReader reader = new BufferedReader(streamReader);
   while ((line=reader.readLine())!=null){
    if(currentLineNumber<breakLenInt){
     String outFile=outFileNamePrefix+"_"+count+".txt";
  
     fstream = new FileWriter(outFile,true);
     BufferedWriter bw=new BufferedWriter(fstream);
  
     bw.write(line);
     bw.newLine();
     bw.close();
     
     currentLineNumber++;
    }
    else{
     breakLenInt=breakLenInt+Integer.parseInt(lineBreak);
     count++;
    }
   }
   reader.close();
   streamReader.close();
   IDataUtil.put( pipelineCursor, "result", "Success" );
  } catch (FileNotFoundException e) {
   IDataUtil.put( pipelineCursor, "result", e.toString() );
  } catch (UnsupportedEncodingException e) {
   IDataUtil.put( pipelineCursor, "result", e.toString() );
  } catch (IOException e) {
   IDataUtil.put( pipelineCursor, "result", e.toString() );
  }

No comments:

Post a Comment