# HIVE 第五章 查询

1.query

hive> SELECT name, subordinates[0] FROM employees;

John Doe Mary Smith

Mary Smith Bill King

Todd Jones NULL

2.expression

hive> SELECT upper(name), salary, deductions["Federal Taxes"],

round(salary * (1 - deductions["Federal Taxes"])) FROM employees;

3.expression

SELECT count(*), avg(salary) FROM employees;

4.distinct

SELECT count(DISTINCT symbol) FROM stocks;

5.limit

hive> SELECT upper(name), salary, deductions["Federal Taxes"],

> round(salary * (1 - deductions["Federal Taxes"])) FROM employees

> LIMIT 2;

JOHN DOE 100000.0 0.2 80000

MARY SMITH 80000.0 0.2 64000

6.列名 别名

SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,

> round(salary * (1 - deductions["Federal Taxes"])) as

salary_minus_fed_taxes

> FROM employees LIMIT 2;

7.嵌套select （不可有having在内部select）

hive> FROM (

> SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,

> round(salary * (1 - deductions["Federal Taxes"])) as

salary_minus_fed_taxes

> FROM employees

> ) e

> SELECT e.name, e.salary_minus_fed_taxes

> WHERE e.salary_minus_fed_taxes > 70000;

JOHN DOE 100000.0 0.2 80000

8. case when then

hive> SELECT name, salary,

> CASE

> WHEN salary < 50000.0 THEN 'low'

> WHEN salary >= 50000.0 AND salary < 70000.0 THEN 'middle'

> WHEN salary >= 70000.0 AND salary < 100000.0 THEN 'high'

> ELSE 'very high'

> END AS bracket FROM employees;

John Doe 100000.0 very high

Mary Smith 80000.0 high

Todd Jones 70000.0 high

Bill King 60000.0 middle

Boss Man 200000.0 very high

9.hive不适用map reduce

SELECT * FROM employees;

SELECT * FROM employees

WHERE country = 'US' AND state = 'CA'

LIMIT 100;

10.使用like和rlick

like是如同sql语句

'%Chi%';

rlick可以使用如同java的正则

> FROM employees WHERE address.street RLIKE '.*(Chicago|Ontario).*';

Mary Smith 100 Ontario St.

Todd Jones 200 Chicago Ave.

11 group by语句

hive> SELECT year(ymd), avg(price_close) FROM stocks

> WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'

> GROUP BY year(ymd);

1984 25.578625440597534

12 having语句

hive> SELECT year(ymd), avg(price_close) FROM stocks

> WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'

> GROUP BY year(ymd)

> HAVING avg(price_close) > 50.0;

1987 53.88968399108163

1991 52.49553383386182

13 join inner（建议将最大的table放在最后）

hive> SELECT a.ymd, a.price_close, b.price_close

> FROM stocks a JOIN stocks b ON a.ymd = b.ymd

> WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM';

（注意，像比较这样的join条件是不允许的，条件中也不能用or）

SELECT a.ymd, a.price_close, b.price_close

FROM stocks a JOIN stocks b

ON a.ymd <= b.ymd

WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM';

14.left outer join

hive> SELECT s.ymd, s.symbol, s.price_close, d.dividend

> FROM stocks s LEFT OUTER JOIN dividends d ON s.ymd = d.ymd AND s.symbol =

d.symbol

> WHERE s.symbol = 'AAPL';

...

1987-05-01 AAPL 80.0 NULL

1987-05-04 AAPL 79.75 NULL

1987-05-05 AAPL 80.25 NULL

HIVE 第五章 查询

• 0

开心

• 0

板砖

• 0

感动

• 0

有用

• 0

疑问

• 0

难过

• 0

无聊

• 0

震惊