用Java加载sklearn模型。 在python中使用DNNClassifier创建的模型

目标是在Java中使用tensorflow.contrib.learn.learn.DNNClassifier在python中创建/训练的模型。

目前主要问题是要知道在会话运行方法上用java给出的“张量”的名称。

我在python中有这个测试代码:

  from __future__ import division, print_function, absolute_import import tensorflow as tf import pandas as pd import tensorflow.contrib.learn as learn import numpy as np from sklearn import metrics from sklearn.cross_validation import train_test_split from tensorflow.contrib import layers from tensorflow.contrib.learn.python.learn.utils import input_fn_utils from tensorflow.python.ops import array_ops from tensorflow.python.framework import dtypes from tensorflow.python.util.compat import as_text print(tf.VERSION) df = pd.read_csv('../NNNormalizeData-out.csv') inputs = [] target = [] y=0; for x in df.columns: if y != 35 : #print("added %d" %y) inputs.append(x) else : target.append(x) y+=1 total_inputs,total_output = df.as_matrix(inputs).astype(np.float32),df.as_matrix([target]).astype(np.int32) train_inputs, test_inputs, train_output, test_output = train_test_split(total_inputs, total_output, test_size=0.2, random_state=42) feature_columns = [tf.contrib.layers.real_valued_column("", dimension=train_inputs.shape[1],dtype=tf.float32)] #target_column = [tf.contrib.layers.real_valued_column("output", dimension=train_output.shape[1])] classifier = learn.DNNClassifier(hidden_units=[10, 20, 5], n_classes=5 ,feature_columns=feature_columns) classifier.fit(train_inputs, train_output, steps=100) #Save Model into saved_model.pbtxt file (possible to Load in Java) tfrecord_serving_input_fn = tf.contrib.learn.build_parsing_serving_input_fn(layers.create_feature_spec_for_parsing(feature_columns)) classifier.export_savedmodel(export_dir_base="test", serving_input_fn = tfrecord_serving_input_fn,as_text=True) # Measure accuracy pred = list(classifier.predict(test_inputs, as_iterable=True)) score = metrics.accuracy_score(test_output, pred) print("Final score: {}".format(score)) # test individual samples sample_1 = np.array( [[0.37671986791414125,0.28395908337619136,-0.0966095873607713,-1.0,0.06891621389763203,-0.09716678086712205,0.726029084013637,4.984689881073479E-4,-0.30296253267499107,-0.16192917054985334,0.04820256230479658,0.4951319883569152,0.5269983894210499,-0.2560313828048315,-0.3710980821053321,-0.4845867212612598,-0.8647234314469595,-0.6491591208322198,-1.0,-0.5004549422844073,-0.9880910165770813,0.5540293108747256,0.5625990251930839,0.7420121698556554,0.5445551415657979,0.4644276850235627,0.7316976292340245,0.636690006814346,0.16486621649984112,-0.0466018967678159,0.5261100063227044,0.6256168612312738,-0.544295484930702,0.379125782517193,0.6959368575211544]], dtype=float) sample_2 = np.array( [[1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085]], dtype=float) pred = list(classifier.predict(sample_2, as_iterable=True)) print("Prediction for sample_1 is:{} ".format(pred)) pred = list(classifier.predict_proba(sample_2, as_iterable=True)) print("Prediction for sample_2 is:{} ".format(pred)) 

创建model_saved.pbtxt文件。

我尝试使用以下代码在Java中加载此模型:

  public class HelloTF { public static void main(String[] args) throws Exception { SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve"); Session s = bundle.session(); double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085}; float [] inputfloat=new float[inputDouble.length]; for(int i=0;i maxVal) { predict = inc; maxVal = val; } inc++; } System.out.println(predict); } } 

我在.run()上得到错误.get(0); 线:

 Exception in thread "main" org.tensorflow.TensorFlowException: Output 0 of type float does not match declared output type string for node _recv_input_example_tensor_0 = _Recv[_output_shapes=[[-1]], client_terminated=true, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=3663984897684684554, tensor_name="input_example_tensor:0", tensor_type=DT_STRING, _device="/job:localhost/replica:0/task:0/cpu:0"]() at org.tensorflow.Session.run(Native Method) at org.tensorflow.Session.access$100(Session.java:48) at org.tensorflow.Session$Runner.runHelper(Session.java:285) at org.tensorflow.Session$Runner.run(Session.java:235) at tensorflow.HelloTF.main(HelloTF.java:35) 

好吧我终于解决了:主要问题是在java中使用的输入名称是“”dnn / input_from_feature_columns / input_from_feature_columns / concat“而不是”input_example_tensor“。

我使用图形导航发现了这个:tensorboard –logdir = D:\ python \ Workspace \ Autoencoder \ src \ dnn \ ModelSave

这是java代码:

 public class HelloTF { public static void main(String[] args) throws Exception { SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve"); Session s = bundle.session(); double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085}; float [] inputfloat=new float[inputDouble.length]; for(int i=0;i maxVal) { predict = inc; maxVal = val; } inc++; } System.out.println(predict); } 

}

我测试了输出:

植物方:

 Prediction for sample_2 is:[3] Prediction for sample_2 is:[array([ 0.17157166, 0.24475774, 0.16158019, 0.24648622, 0.17560424], dtype=float32)] 

Java方面:

 0.17157166 0.24475774 0.16158019 0.24648622 0.17560424 3 

错误消息提供了一个线索:模型中名为"input_example_tensor"的张量期望具有string内容,而您提供了float值。

根据张量的名称和你的代码来判断,我猜你所喂食的张量是在input_fn_utils.py定义的 。 这个张量传递给tf.parse_example() op,它需要一个tf.train.Example协议缓冲区的向量,序列化为字符串。

我在Tensorflow 1.1上收到了一个没有feed("input_example_tensor", inputTensor)的错误feed("input_example_tensor", inputTensor)

但是我发现example.proto可以作为“input_example_tensor”提供,虽然花了很多时间来弄清楚如何为序列化协议缓冲区创建字符串张量。

这就是我创建inputTensor

 org.tensorflow.example.Example.Builder example = org.tensorflow.example.Example.newBuilder(); /* set some features to example... */ Tensor exampleTensor = Tensor.create(example.build().toByteArray()); // Here, the shape of exampleTensor is not specified yet. // Set the shape to feed this as "input_example_tensor" Graph g = bundle.graph(); Output examplePlaceholder = g.opBuilder("Placeholder", "example") .setAttr("dtype", exampleTensor.dataType()) .build().output(0); Tensor shapeTensor = Tensor.create(new long[]{1}, IntBuffer.wrap(new int[]{1})); Output shapeConst = g.opBuilder("Const", "shape") .setAttr("dtype", shapeTensor.dataType()) .setAttr("value", shapeTensor) .build().output(0); Output shaped = g.opBuilder("Reshape", "output").addInput(examplePlaceholder).addInput(shapeConst).build().output(0); Tensor inputTensor = s.runner().feed(examplePlaceholder, exampleTensor).fetch(shaped).run().get(0); // Now, inputTensor has shape of [1] and ready to feed. 

.feed()和.fetch()中的参数应与输入和输出数据类型匹配。

您可以查看savedmodel.pbtxt文件。 有关您的参数及其输入/输出类型的详细信息。

例如,

我的java代码

 Tensor result = s.runner() .feed("ParseExample/ParseExample", inputTensor) .fetch("dnn/binary_logistic_head/predictions/probabilities") .run().get(0); 

我的savedModel.pbtxt(部分内容)

 node { name: "ParseExample/ParseExample" op: "ParseExample" input: "input_example_tensor" input: "ParseExample/ParseExample/names" input: "ParseExample/ParseExample/dense_keys_0" input: "ParseExample/Const" attr { key: "Ndense" value { i: 1 } } attr { key: "Nsparse" value { i: 0 } } attr { key: "Tdense" value { list { type: DT_FLOAT } } } attr { key: "_output_shapes" value { list { shape { dim { size: -1 } dim { size: 2 } } } } } attr { key: "dense_shapes" value { list { shape { dim { size: 2 } } } } } attr { key: "sparse_types" value { list { } } } } outputs { key: "scores" value { name: "dnn/binary_logistic_head/predictions/probabilities:0" dtype: DT_FLOAT tensor_shape { dim { size: -1 } dim { size: 2 } } } } 

它们都与我的数据类型float兼容。