Java – 读取文件并拆分成多个文件

我有一个文件,我想用Java读取并将此文件拆分为n (用户输入)输出文件。 这是我如何阅读文件:

 int n = 4; BufferedReader br = new BufferedReader(new FileReader("file.csv")); try { String line = br.readLine(); while (line != null) { line = br.readLine(); } } finally { br.close(); } 

如何将文件 – file.csvn文件?

注 – 由于文件中的条目数量大约为100k,因此无法将文件内容存储到数组中,然后将其拆分并保存到多个文件中。

由于文件可能非常大,因此拆分文件本身也可能很大:

例:

源文件大小:5GB

Num Splits:5:目的地

文件大小:每个1GB(5个文件)

即使我们有这样的记忆,也无法一次性读取这个大的拆分块。 基本上对于每个分割,我们可以读取一个固定大小的byte-array ,我们知道它在性能和内存方面都应该是可行的。

NumSplits:10 MaxReadBytes:8KB

 public static void main(String[] args) throws Exception { RandomAccessFile raf = new RandomAccessFile("test.csv", "r"); long numSplits = 10; //from user input, extract it from args long sourceSize = raf.length(); long bytesPerSplit = sourceSize/numSplits ; long remainingBytes = sourceSize % numSplits; int maxReadBufferSize = 8 * 1024; //8KB for(int destIx=1; destIx <= numSplits; destIx++) { BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split."+destIx)); if(bytesPerSplit > maxReadBufferSize) { long numReads = bytesPerSplit/maxReadBufferSize; long numRemainingRead = bytesPerSplit % maxReadBufferSize; for(int i=0; i 0) { readWrite(raf, bw, numRemainingRead); } }else { readWrite(raf, bw, bytesPerSplit); } bw.close(); } if(remainingBytes > 0) { BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream("split."+(numSplits+1))); readWrite(raf, bw, remainingBytes); bw.close(); } raf.close(); } static void readWrite(RandomAccessFile raf, BufferedOutputStream bw, long numBytes) throws IOException { byte[] buf = new byte[(int) numBytes]; int val = raf.read(buf); if(val != -1) { bw.write(buf); } } 
 import java.io.*; import java.util.Scanner; public class split { public static void main(String args[]) { try{ // Reading file and getting no. of files to be generated String inputfile = "C:/test.txt"; // Source File Name. double nol = 2000.0; // No. of lines to be split and saved in each output file. File file = new File(inputfile); Scanner scanner = new Scanner(file); int count = 0; while (scanner.hasNextLine()) { scanner.nextLine(); count++; } System.out.println("Lines in the file: " + count); // Displays no. of lines in the input file. double temp = (count/nol); int temp1=(int)temp; int nof=0; if(temp1==temp) { nof=temp1; } else { nof=temp1+1; } System.out.println("No. of files to be generated :"+nof); // Displays no. of files to be generated. //--------------------------------------------------------------------------------------------------------- // Actual splitting of file into smaller files FileInputStream fstream = new FileInputStream(inputfile); DataInputStream in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String strLine; for (int j=1;j<=nof;j++) { FileWriter fstream1 = new FileWriter("C:/New Folder/File"+j+".txt"); // Destination File Location BufferedWriter out = new BufferedWriter(fstream1); for (int i=1;i<=nol;i++) { strLine = br.readLine(); if (strLine!= null) { out.write(strLine); if(i!=nol) { out.newLine(); } } } out.close(); } in.close(); }catch (Exception e) { System.err.println("Error: " + e.getMessage()); } } } 

虽然它是一个老问题,但作为参考我列出了我用来将大文件分割成任何大小的代码,它适用于1.4以上的任何Java版本。

示例拆分和连接块如下所示:

 public void join(String FilePath) { long leninfile = 0, leng = 0; int count = 1, data = 0; try { File filename = new File(FilePath); //RandomAccessFile outfile = new RandomAccessFile(filename,"rw"); OutputStream outfile = new BufferedOutputStream(new FileOutputStream(filename)); while (true) { filename = new File(FilePath + count + ".sp"); if (filename.exists()) { //RandomAccessFile infile = new RandomAccessFile(filename,"r"); InputStream infile = new BufferedInputStream(new FileInputStream(filename)); data = infile.read(); while (data != -1) { outfile.write(data); data = infile.read(); } leng++; infile.close(); count++; } else { break; } } outfile.close(); } catch (Exception e) { e.printStackTrace(); } } public void split(String FilePath, long splitlen) { long leninfile = 0, leng = 0; int count = 1, data; try { File filename = new File(FilePath); //RandomAccessFile infile = new RandomAccessFile(filename, "r"); InputStream infile = new BufferedInputStream(new FileInputStream(filename)); data = infile.read(); while (data != -1) { filename = new File(FilePath + count + ".sp"); //RandomAccessFile outfile = new RandomAccessFile(filename, "rw"); OutputStream outfile = new BufferedOutputStream(new FileOutputStream(filename)); while (data != -1 && leng < splitlen) { outfile.write(data); leng++; data = infile.read(); } leninfile += leng; leng = 0; outfile.close(); count++; } } catch (Exception e) { e.printStackTrace(); } } 

完整的Java代码可在Java Program链接中的File Split中找到 。

有一个计数器来计算没有条目。 让我们说每行一个条目。

step1:最初创建新的子文件,设置counter = 0;

step2:当你从源文件读取每个条目到缓冲区时递增计数器

step3:当计数器达到要在每个子文件中写入的条目数限制时,将缓冲区的内容刷新到子文件。 关闭子文件

step4:跳转到step1,直到源文件中的数据被读取为止

没有必要在文件中循环两次。 您可以估计每个块的大小,因为源文件大小除以所需的块数。 然后你只是停止用数据填充每个块,因为它的大小超过估计值。

这是一个适合我的,我用它来分割10GB文件。 它还允许您添加页眉和页脚。 在拆分基于文档的格式(如XML和JSON)时非常有用,因为您需要在新的拆分文件中添加文档包装器。

 import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; public class FileSpliter { public static void main(String[] args) throws IOException { splitTextFiles("D:\\xref.csx", 750000, "", "", null); } public static void splitTextFiles(String fileName, int maxRows, String header, String footer, String targetDir) throws IOException { File bigFile = new File(fileName); int i = 1; String ext = fileName.substring(fileName.lastIndexOf(".")); String fileNoExt = bigFile.getName().replace(ext, ""); File newDir = null; if(targetDir != null) { newDir = new File(targetDir); } else { newDir = new File(bigFile.getParent() + "\\" + fileNoExt + "_split"); } newDir.mkdirs(); try (BufferedReader reader = Files.newBufferedReader(Paths.get(fileName))) { String line = null; int lineNum = 1; Path splitFile = Paths.get(newDir.getPath() + "\\" + fileNoExt + "_" + String.format("%02d", i) + ext); BufferedWriter writer = Files.newBufferedWriter(splitFile, StandardOpenOption.CREATE); while ((line = reader.readLine()) != null) { if(lineNum == 1) { System.out.print("new file created '" + splitFile.toString()); if(header != null && header.length() > 0) { writer.append(header); writer.newLine(); } } writer.append(line); if (lineNum >= maxRows) { if(footer != null && footer.length() > 0) { writer.newLine(); writer.append(footer); } writer.close(); System.out.println(", " + lineNum + " lines written to file"); lineNum = 1; i++; splitFile = Paths.get(newDir.getPath() + "\\" + fileNoExt + "_" + String.format("%02d", i) + ext); writer = Files.newBufferedWriter(splitFile, StandardOpenOption.CREATE); } else { writer.newLine(); lineNum++; } } if(lineNum <= maxRows) // early exit { if(footer != null && footer.length() > 0) { writer.newLine(); lineNum++; writer.append(footer); } } writer.close(); System.out.println(", " + lineNum + " lines written to file"); } System.out.println("file '" + bigFile.getName() + "' split into " + i + " files"); } } 

下面的代码用于将大文件拆分为较小行的小文件。

  long linesWritten = 0; int count = 1; try { File inputFile = new File(inputFilePath); InputStream inputFileStream = new BufferedInputStream(new FileInputStream(inputFile)); BufferedReader reader = new BufferedReader(new InputStreamReader(inputFileStream)); String line = reader.readLine(); String fileName = inputFile.getName(); String outfileName = outputFolderPath + "\\" + fileName; while (line != null) { File outFile = new File(outfileName + "_" + count + ".split"); Writer writer = new OutputStreamWriter(new FileOutputStream(outFile)); while (line != null && linesWritten < linesPerSplit) { writer.write(line); line = reader.readLine(); linesWritten++; } writer.close(); linesWritten = 0;//next file count++;//nect file count } reader.close(); } catch (Exception e) { e.printStackTrace(); }