昨天捣鼓了一天这个东西,随便写点笔记。
{ "type" : "record", "name" : "Tweet", "namespace" : "com.miguno.avro", "fields" : [ { "name" : "username", "type" : "string", "doc" : "Name of the user account on Twitter.com" }, { "name" : "tweet", "type" : "string", "doc" : "The content of the user's Twitter message" }, { "name" : "timestamp", "type" : "long", "doc" : "Unix epoch time in seconds" } ], "doc:" : "A basic schema for storing Twitter messages" }
CREATE EXTERNAL TABLE tweets COMMENT "A table backed by Avro data with the Avro schema stored in HDFS" ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' LOCATION '/user/YOURUSER/examples/input/' TBLPROPERTIES ( 'avro.schema.url'='hdfs:///user/YOURUSER/examples/schema/twitter.avsc' );
然后就是正常的玩法了。