Hive,我如何检索所有数据库的表列

我想在Hive中编写这个sql请求的等价物:

select * from information_schema.columns where table_schema='database_name' 

如何访问配置单元的Metastore并检索存储在特定数据库中的所有表的所有列? 我知道我们可以通过describe [table_name]通过表来完成它,但是在同一个请求中是否有数据库中所有表的所有列?

如果您希望能够运行返回hive元数据的此类查询,则可以使用MySQL设置Hive Metastore,Hive中使用的元数据存储在MySQL的特定帐户中。

你必须通过CREATE USER 'hive'@'metastorehost' IDENTIFIED BY 'mypassword'来为hive创建一个MySQL CREATE USER 'hive'@'metastorehost' IDENTIFIED BY 'mypassword'

然后,您将找到像COLUMNS_VS这样的COLUMNS_VS其中包含您要查找的信息。

检索所有表中所有列的示例查询可以是: SELECT COLUMN_NAME, TBL_NAME FROM COLUMNS_V2 c JOIN TBLS a ON c.CD_ID=a.TBL_ID

或者,您可以通过REST调用WebHCat访问此信息,请参阅wiki以获取更多信息。

如何访问配置单元的Metastore并检索存储在特定数据库中的所有表的所有列?

这是连接HiveMetaStoreClient一种方法,您可以使用方法getTableColumnsInformation获取列。

在此类中,可以提取所有其他信息(如分区)。 请参阅示例客户端和示例方法。

 import org.apache.hadoop.hive.conf.HiveConf; // test program public class Test { public static void main(String[] args){ HiveConf hiveConf = new HiveConf(); hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://host:port"); HiveMetaStoreConnector hiveMetaStoreConnector = new HiveMetaStoreConnector(hiveConf); if(hiveMetaStoreConnector != null){ System.out.print(hiveMetaStoreConnector.getAllPartitionInfo("tablename")); } } } // define a class like this import com.google.common.base.Joiner; import com.google.common.collect.Lists; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.thrift.TException; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class HiveMetaStoreConnector { private HiveConf hiveConf; HiveMetaStoreClient hiveMetaStoreClient; public HiveMetaStoreConnector(String msAddr, String msPort){ try { hiveConf = new HiveConf(); hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, msAddr+":"+ msPort); hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); } catch (MetaException e) { e.printStackTrace(); System.err.println("Constructor error"); System.err.println(e.toString()); System.exit(-100); } } public HiveMetaStoreConnector(HiveConf hiveConf){ try { this.hiveConf = hiveConf; hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); } catch (MetaException e) { e.printStackTrace(); System.err.println("Constructor error"); System.err.println(e.toString()); System.exit(-100); } } public String getAllPartitionInfo(String dbName){ List res = Lists.newArrayList(); try { List tableList = hiveMetaStoreClient.getAllTables(dbName); for(String tableName:tableList){ res.addAll(getTablePartitionInformation(dbName,tableName)); } } catch (MetaException e) { e.printStackTrace(); System.out.println("getAllTableStatistic error"); System.out.println(e.toString()); System.exit(-100); } return Joiner.on("\n").join(res); } public List getTablePartitionInformation(String dbName, String tableName){ List partitionsInfo = Lists.newArrayList(); try { List partitionNames = hiveMetaStoreClient.listPartitionNames(dbName,tableName, (short) 10000); List partitions = hiveMetaStoreClient.listPartitions(dbName,tableName, (short) 10000); for(Partition partition:partitions){ StringBuffer sb = new StringBuffer(); sb.append(tableName); sb.append("\t"); List partitionValues = partition.getValues(); if(partitionValues.size()<4){ int size = partitionValues.size(); for(int j=0; j<4-size;j++){ partitionValues.add("null"); } } sb.append(Joiner.on("\t").join(partitionValues)); sb.append("\t"); DateTime createDate = new DateTime((long)partition.getCreateTime()*1000); sb.append(createDate.toString("yyyy-MM-dd HH:mm:ss")); partitionsInfo.add(sb.toString()); } } catch (TException e) { e.printStackTrace(); return Arrays.asList(new String[]{"error for request on" + tableName}); } return partitionsInfo; } public String getAllTableStatistic(String dbName){ List res = Lists.newArrayList(); try { List tableList = hiveMetaStoreClient.getAllTables(dbName); for(String tableName:tableList){ res.addAll(getTableColumnsInformation(dbName,tableName)); } } catch (MetaException e) { e.printStackTrace(); System.out.println("getAllTableStatistic error"); System.out.println(e.toString()); System.exit(-100); } return Joiner.on("\n").join(res); } public List getTableColumnsInformation(String dbName, String tableName){ try { List fields = hiveMetaStoreClient.getFields(dbName, tableName); List infs = Lists.newArrayList(); int cnt = 0; for(FieldSchema fs : fields){ StringBuffer sb = new StringBuffer(); sb.append(tableName); sb.append("\t"); sb.append(cnt); sb.append("\t"); cnt++; sb.append(fs.getName()); sb.append("\t"); sb.append(fs.getType()); sb.append("\t"); sb.append(fs.getComment()); infs.add(sb.toString()); } return infs; } catch (TException e) { e.printStackTrace(); System.out.println("getTableColumnsInformation error"); System.out.println(e.toString()); System.exit(-100); return null; } } }