中间层是元数据层。其中 Metadata File 记录了表的存储位置、Schema 演化信息、分区演化信息以及所有的 Snapshot 和 Manifest List 信息,对应的是v1.metadata.json和v2.metadata.json文件,其中v后面的数字和version-hint.text文件中的数字对应,每当新增一个 Snapshot 的时候,version-hint.text中的数字加 1,同时也会新增一个vx.metadata.json文件,比如执行insert into hadoop_catalog.iceberg_db.user_log_iceberg values ('xxxxxxxxxxxxx', 'yyyyyyyyyyyyy', cast(1640986400 as timestamp))、delete from hadoop_catalog.iceberg_db.user_log_iceberg where udt = cast(1640986400 as timestamp)之后,版本就会变成v4:
CREATE EXTERNAL TABLE external_table2 (
column1 STRING,
column2 INT,
column3 DOUBLE)ROW FORMAT DELIMITEDFIELDS TERMINATED BY ','LINES TERMINATED BY '\n'STORED AS TEXTFILELOCATION 'file:///path/to/local/file';### hive文件存储格式包括以下几类(STORED AS TEXTFILE):#1、TEXTFILE#2、SEQUENCEFILE#3、RCFILE#4、ORCFILE(0.11以后出现)#其中TEXTFILE为默认格式,建表时不指定默认为这个格式,导入数据时会直接把数据文件拷贝到hdfs上不进行处理;
创建分区表的语法类似于创建普通表,只不过要使用 PARTITIONED BY 子句指定一个或多个分区列,例如:
# 内部表CREATE TABLE partitioned_internal_table (
id INT,
mesg STRING)PARTITIONED BY (
year INT,
month INT)
ROW FORMAT DELIMITEDFIELDS TERMINATED BY ','LINES TERMINATED BY '\n'STORED AS TEXTFILE;
# 外部表
CREATE EXTERNAL TABLE partitioned_external_table (
id INT,
mesg STRING)PARTITIONED BY (
year INT,
month INT)
ROW FORMAT DELIMITEDFIELDS TERMINATED BY ','LINES TERMINATED BY '\n'STORED AS TEXTFILELOCATION '/user/hive/partitioned_table/data';
LOAD DATA LOCAL INPATH './file.csv' INTO TABLE partitioned_external_table PARTITION (year=2019, month=1);# 查看分区show partitions partitioned_external_table;
在上述语句中,我们使用 LOAD DATA 子句将 /data/file.csv 文件加载到partitioned_table 表中,并指定了分区year为2019,分区month为1。
Error: Error while compiling statement: [Error 10308]: Attempt to acquire compile lock timed out. (state=,code=10308) org.apache.hive.service.cli.HiveSQLException: Error while compiling statement: [Error 10308]: Attempt to acquire compile lock timed out. at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:241) at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:227) at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:255) at org.apache.hive.beeline.Commands.executeInternal(Commands.java:989) at org.apache.hive.beeline.Commands.execute(Commands.java:1180) at org.apache.hive.beeline.Commands.sql(Commands.java:1094) at org.apache.hive.beeline.BeeLine.dispatch(BeeLine.java:1180) at org.apache.hive.beeline.BeeLine.execute(BeeLine.java:1013) at org.apache.hive.beeline.BeeLine.begin(BeeLine.java:922) at org.apache.hive.beeline.BeeLine.mainWithInputRedirection(BeeLine.java:518) at org.apache.hive.beeline.BeeLine.main(BeeLine.java:501) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:226) at org.apache.hadoop.util.RunJar.main(RunJar.java:141) Caused by: org.apache.hive.service.cli.HiveSQLException: Error while compiling statement: [Error 10308]: Attempt to acquire compile lock timed out. at org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:400) at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:187) at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:271) at org.apache.hive.service.cli.operation.Operation.run(Operation.java:337) at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:439) at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:416) at org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:282) at org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:503) at org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1313) at org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1298) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)