如何使用Apache Avro Avro二进制编码JSON字符串?

我试图avro二进制编码我的JSON字符串。 下面是我的JSON字符串,我创建了一个简单的方法来进行转换,但我不确定我的方式是否正确?

public static void main(String args[]) throws Exception{ try{ Schema schema = new Parser().parse((TestExample.class.getResourceAsStream("/3233.avsc"))); String json="{"+ " \"location\" : {"+ " \"devices\":["+ " {"+ " \"did\":\"9abd09-439bcd-629a8f\","+ " \"dt\":\"browser\","+ " \"usl\":{"+ " \"pos\":{"+ " \"source\":\"GPS\","+ " \"lat\":90.0,"+ " \"long\":101.0,"+ " \"acc\":100"+ " },"+ " \"addSource\":\"LL\","+ " \"add\":["+ " {"+ " \"val\":\"2123\","+ " \"type\" : \"NUM\""+ " },"+ " {"+ " \"val\":\"Harris ST\","+ " \"type\" : \"ST\""+ " }"+ " ],"+ " \"ei\":{"+ " \"ibm\":true,"+ " \"sr\":10,"+ " \"ienz\":true,"+ " \"enz\":100,"+ " \"enr\":10"+ " },"+ " \"lm\":1390598086120"+ " }"+ " }"+ " ],"+ " \"ver\" : \"1.0\""+ " }"+ "}"; byte[] avroByteArray = fromJsonToAvro(json,schema); } catch (Exception ex) { // log an exception } 

下面的方法将我的JSON字符串转换为Avro二进制编码 –

 private static byte[] fromJsonToAvro(String json, Schema schema) throws Exception { InputStream input = new ByteArrayInputStream(json.getBytes()); DataInputStream din = new DataInputStream(input); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); DatumReader reader = new GenericDatumReader(schema); Object datum = reader.read(null, decoder); GenericDatumWriter w = new GenericDatumWriter(schema); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); Encoder e = EncoderFactory.get().binaryEncoder(outputStream, null); w.write(datum, e); e.flush(); return outputStream.toByteArray(); } 

任何人都可以看看,让我知道我试图avro二进制我的JSON字符串的方式是否正确?

我认为OP是正确的。 如果这是一个Avro数据文件,这将编写Avro记录,而不会显示存在的模式。

以下是Avro本身的一些示例(如果您正在处理文件,则非常有用)。
•从JSON到Avro: DataFileWriteTool
•从Avro到JSON: DataFileReadTool

以下是两个方面的完整示例。

 @Grapes([ @Grab(group='org.apache.avro', module='avro', version='1.7.7') ]) import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import org.apache.avro.Schema; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.io.JsonEncoder; String schema = '''{ "type":"record", "namespace":"foo", "name":"Person", "fields":[ { "name":"name", "type":"string" }, { "name":"age", "type":"int" } ] }''' String json = "{" + "\"name\":\"Frank\"," + "\"age\":47" + "}" assert avroToJson(jsonToAvro(json, schema), schema) == json public static byte[] jsonToAvro(String json, String schemaStr) throws IOException { InputStream input = null; GenericDatumWriter writer = null; Encoder encoder = null; ByteArrayOutputStream output = null; try { Schema schema = new Schema.Parser().parse(schemaStr); DatumReader reader = new GenericDatumReader(schema); input = new ByteArrayInputStream(json.getBytes()); output = new ByteArrayOutputStream(); DataInputStream din = new DataInputStream(input); writer = new GenericDatumWriter(schema); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); encoder = EncoderFactory.get().binaryEncoder(output, null); GenericRecord datum; while (true) { try { datum = reader.read(null, decoder); } catch (EOFException eofe) { break; } writer.write(datum, encoder); } encoder.flush(); return output.toByteArray(); } finally { try { input.close(); } catch (Exception e) { } } } public static String avroToJson(byte[] avro, String schemaStr) throws IOException { boolean pretty = false; GenericDatumReader reader = null; JsonEncoder encoder = null; ByteArrayOutputStream output = null; try { Schema schema = new Schema.Parser().parse(schemaStr); reader = new GenericDatumReader(schema); InputStream input = new ByteArrayInputStream(avro); output = new ByteArrayOutputStream(); DatumWriter writer = new GenericDatumWriter(schema); encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty); Decoder decoder = DecoderFactory.get().binaryDecoder(input, null); GenericRecord datum; while (true) { try { datum = reader.read(null, decoder); } catch (EOFException eofe) { break; } writer.write(datum, encoder); } encoder.flush(); output.flush(); return new String(output.toByteArray()); } finally { try { if (output != null) output.close(); } catch (Exception e) { } } } 

为了完整起见,这里有一个例子,如果您使用流(Avro调用这些容器文件 )而不是记录。 请注意,当您从JSON返回Avro时,您无需传递架构。 这是因为它存在于流中。

 @Grapes([ @Grab(group='org.apache.avro', module='avro', version='1.7.7') ]) // writes Avro as a http://avro.apache.org/docs/current/spec.html#Object+Container+Files rather than a sequence of records import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import org.apache.avro.Schema; import org.apache.avro.file.DataFileStream; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.io.JsonEncoder; String schema = '''{ "type":"record", "namespace":"foo", "name":"Person", "fields":[ { "name":"name", "type":"string" }, { "name":"age", "type":"int" } ] }''' String json = "{" + "\"name\":\"Frank\"," + "\"age\":47" + "}" assert avroToJson(jsonToAvro(json, schema)) == json public static byte[] jsonToAvro(String json, String schemaStr) throws IOException { InputStream input = null; DataFileWriter writer = null; Encoder encoder = null; ByteArrayOutputStream output = null; try { Schema schema = new Schema.Parser().parse(schemaStr); DatumReader reader = new GenericDatumReader(schema); input = new ByteArrayInputStream(json.getBytes()); output = new ByteArrayOutputStream(); DataInputStream din = new DataInputStream(input); writer = new DataFileWriter(new GenericDatumWriter()); writer.create(schema, output); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); GenericRecord datum; while (true) { try { datum = reader.read(null, decoder); } catch (EOFException eofe) { break; } writer.append(datum); } writer.flush(); return output.toByteArray(); } finally { try { input.close(); } catch (Exception e) { } } } public static String avroToJson(byte[] avro) throws IOException { boolean pretty = false; GenericDatumReader reader = null; JsonEncoder encoder = null; ByteArrayOutputStream output = null; try { reader = new GenericDatumReader(); InputStream input = new ByteArrayInputStream(avro); DataFileStream streamReader = new DataFileStream(input, reader); output = new ByteArrayOutputStream(); Schema schema = streamReader.getSchema(); DatumWriter writer = new GenericDatumWriter(schema); encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty); for (GenericRecord datum : streamReader) { writer.write(datum, encoder); } encoder.flush(); output.flush(); return new String(output.toByteArray()); } finally { try { if (output != null) output.close(); } catch (Exception e) { } } } 

要添加基冈的答案,这个讨论可能很有用:

http://mail-archives.apache.org/mod_mbox/avro-user/201209.mbox/%3CCALEq1Z8s1sfaAVB7YE2rpZ=v3q1V_h7Vm39h0HsOzxJ+qfQRSg@mail.gmail.com%3E

要点是有一个特殊的Json模式,你可以使用JsonReader / Writer来进出。 您应该使用的Json模式在此处定义:

https://github.com/apache/avro/blob/trunk/share/schemas/org/apache/avro/data/Json.avsc

当您知道json文件的模式( {schema_file}.avsc )时,可以使用avro-tools将json文件( {input_file}.json .json)转换为avro文件( {output_file}.avro )。 如下所示:

 java -jar the/path/of/avro-tools-1.8.1.jar fromjson {input_file}.json --schema-file {schema_file}.avsc > {output_file}.avro 

顺便说一句, {schema_file}.avsc文件的内容如下:

 {"type": "record", "name": "User", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"]}, {"name": "favorite_color", "type": ["string", "null"]} ] } 

下载avro-tools-1.8.1

下载其他avro-tools