Create DB

CREATE DATABASE <db-name>;

Select an DB

If not select default DB is used

USE <db-name>; 

Show DB and Tables

SHOW DATABASES;
SHOW TABLES;

Create Managed Table

CREATE TABLE student(
	regno INT, 
	name STRING,
	cgpa INT
);
 
# Create an table for loading data from file
CREATE TABLE patient(
	pid INT,
	pname STRING,
	drug STRING,
	gender string,
	tot_amt INT
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE;

Create External Table

# No directory is created in the tables name. The files is given numeric names '00000_0'
CREATE EXTERNAL TABLE patext(
	pid int,
	pname string,
	drug string,
	gender string,
	amt int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION '/xyz/hiveext/';
 
# A Directory in the tables name is created at '/user/hive/warehouse/xyz.db/'
CREATE EXTERNAL TABLE patext1(
	pid int,
	pname string,
	drug string,
	gender string,
	amt int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
 
# Create table use data on Local FS. Note we need to specify an directory not an file
CREATE EXTERNAL TABLE employee(
	empno int,
	pname string,
	sal int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION 'file://home/ak/datasets/Test/';

Insert Data

Insert Command

INSERT INTO TABLE student VALUES(1, 'Arjun', 9);
 
INSERT INTO TABLE <db-name>
SELECT * FROM <table-name>;
 
# Overwrites existing data in the table
INSERT OVERWRITE TABLE patient1
SELECT * FROM patient
WHERE drug='Para'; 

Load Data from Local FS

LOAD DATA LOCAL INPATH '<local-file>' INTO TABLE <table-name>;

Local Data from HDFS

LOAD DATA INPATH '<hdfs-file>' INTO TABLE <table-name>;

NOTE

When loading data from HDFS the file is moved from the current location to the Hive DB location Hence this approach is not recommended Better alternative to use external tables

Sqoop Command

Used for bulk loading data from RDB (Refer hive & Sqoop for command)

HDFS Shell

hadoop fs -put '/hive-table-path'

Describe Table

DESC <table-name>;
DESC EXTENDED <table-name>;
DESC FORMATTED <table-name>;

Delete Table

DROP TABLE <table-name>;

Alter Table

ALTER TABLE <table-name> RENAME TO <new-table-name>;
ALTER TABLE <table-name> ADD COLUMNS (new-field datatype);
ALTER TABLE <table-name> CHANGE <column-name> <new-column-name> <datatype>;
ALTER TABLE <table-name> SET TBLPROPERTIES('EXTERNAL'='TRUE'); # Change managed table to external table
ALTER TABLE <table-name> SET LOCATION "hdfs:///tmp/newloc";

When we change the location of Hive table the data remains in the original location it has to be changed manually

Load Parquet File Data

Data can be saved in formats like parquet, ORC using the INSERT INTO/ INSERT OVERWRITE command

create table parquet_file(
	pid int,
	pname string,
	drug string,
	gender string,
	amt int
)
row format delimited 
fields terminated by ',' 
lines terminated by '\n'
stored as parquetfile;
 
insert overwrite table parquet_file 
select * from patient;

Hive Scripts

hive -f filename.q 

Change Hive Execution Engine

set.hive.execution.engine=spark