Skip to content

读文件之分片读

quhongwei edited this page Mar 28, 2018 · 1 revision

对于大文件,一般先对文件进行分片,在利用集群每个集群读取部分数据

一:文件内容

100|300.03
seq_0|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_2|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
seq_3|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_4|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
seq_5|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_6|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
seq_7|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_8|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
seq_9|inst_seq_0|2013-11-09 12:34:56|20131109|20131112 12:23:34|23.33|10.22|22|12345|true|demo
seq_10|inst_seq_1|2013-11-10 15:56:12|20131110|20131113 12:33:34|23.34|11.88|33|56789|false|
OFDCFEND|20131109|100

二:数据定义模板

{
 	"head":[
	     "totalCount|总笔数|Required|Long",
	     "totalAmount|总金额|BigDecimal|Required"
 	],
 	"body":[
	     "seq|流水号",
	     "instSeq|基金公司订单号|Required",
	     "gmtApply|订单申请时间|Date:yyyy-MM-dd HH:mm:ss",
	     "date|普通日期|Date:yyyyMMdd",
	     "dateTime|普通日期时间|Date:yyyyMMdd HH:mm:ss",
	     "applyNumber|普通数字|BigDecimal",
	     "amount|金额|BigDecimal",
	     "age|年龄|Integer",
	     "longN|长整型|Long",
	     "bol|布尔值|Boolean",
	     "memo|备注"
    ],
    "tail": [
    	"fileEnd|数据文件尾部字符",
    	"date|普通日期|Date:yyyyMMdd",
    	"amount|金额|BigDecimal"
    ],
    "protocol":"SP"
}

三:协议布局模板

SP组件内置协议

四:代码示例

  1. 文件结构分割
String filePath = File.class.getResource("/reader/sp/data/data_split.txt").getPath();
FileConfig config = new FileConfig(filePath, "/reader/sp/template/template3.json", new StorageConfig("nas"));
// 创建分解分割器
FileSplitter splitter = FileFactory.createSplitter(config.getStorageConfig());
// 获取头分片
FileSlice headSlice = splitter.getHeadSlice(config);
// 读取头分片
FileConfig headConfig = config.clone();
headConfig.setPartial(headSlice.getStart(), headSlice.getLength(),
headConfig.getFileDataType());
FileReader headReader = FileFactory.createReader(headConfig);
try {
        Map<String, Object> head = headReader.readHead(HashMap.class);
        System.out.println(head);
} finally {
        headReader.close();
}
// 获取数据内容分片
FileSlice bodySlice = splitter.getBodySlice(config);
// 读取数据内容分片
FileConfig bodyConfig = config.clone();
bodyConfig.setPartial(bodySlice.getStart(), bodySlice.getLength(),
bodySlice.getFileDataType());
FileReader bodyReader = FileFactory.createReader(bodyConfig);
try {
      Map<String, Object> row = null;
      while (null != (row = bodyReader.readRow(HashMap.class))) {
            System.out.println(row);
       }
} finally {
       bodyReader.close();
}
// 获取tail分片
FileSlice tailSlice = splitter.getTailSlice(config);
// 读取tail分片        
FileConfig tailConfig = config.clone();
tailConfig.setPartial(tailSlice.getStart(), tailSlice.getLength(), tailSlice.getFileDataType());
FileReader tailReader = FileFactory.createReader(tailConfig);
try {
       Map<String, Object> tail = tailReader.readTail(HashMap.class);
       System.out.println(tail);
 } finally {
       tailReader.close();
 }
  1. body按大小分片
String filePath = File.class.getResource("/reader/sp/data/data_split.txt").getPath();
FileConfig config = new FileConfig(filePath, "/reader/sp/template/template3.json",  new StorageConfig("nas"));
// 创建分片处理器
FileSplitter splitter = FileFactory.createSplitter(config.getStorageConfig());
// body 按大小分片
List<FileSlice> slices = splitter.getBodySlices(config, 256);
System.out.println(slices.size());
// 分片读取数据
 for (FileSlice slice : slices) {
        FileConfig sliceConfig = config.clone();
        sliceConfig.setPartial(slice.getStart(), slice.getLength(), slice.getFileDataType());
        FileReader reader = FileFactory.createReader(sliceConfig);
        try {
             Map<String, Object> row = null;
             while (null != (row = reader.readRow(HashMap.class))) {
                 System.out.println(row);
              }
         } finally {
             reader.close();
         }
  }
Clone this wiki locally