1. 程式人生 > >第76課:Spark SQL實戰使用者日誌的輸入匯入Hive及SQL計算PV實戰

第76課:Spark SQL實戰使用者日誌的輸入匯入Hive及SQL計算PV實戰

內容:

    1.Hive資料匯入操作
    2.SparkSQL對資料操作實戰

一、Hive資料匯入操作

create table userLogs(date String,timestamp bigint,userID bigint,pageID bigint,channel String,action String);
load data local inpath '/home/hadoop/learnSpark/SparkSQLDataManually/userLogs.log' into table row format delimited fields terminated by '\t' lines terminated by '\n';

二、SparkSQL對資料操作實戰

package SparkSQL;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.hive.HiveContext;

import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

/**
 * FileName: SparkSQLUserLogsOps
 * Author:   hadoop
 * Email:    
[email protected]
* Date: 18-11-12 下午10:19 * Description: */ public class SparkSQLUserLogsOps { public static void main(String[] args){ //建立SparkConf用於讀取系統資訊並設定運用程式的名稱 SparkConf conf = new SparkConf().setAppName("SparkSQLUserLogsOps").setMaster("spark://Master:7077"); //建立JavaSparkContext物件例項作為整個Driver的核心基石 JavaSparkContext sc = new JavaSparkContext(conf); //設定輸出log的等級 sc.setLogLevel("INFO"); //建立SQLContext上下文物件,用於SqL的分析 HiveContext hiveContext = new HiveContext(sc.sc()); String twodaysago = getTwodaysago(); pvStatistic(hiveContext,twodaysago); } private static void pvStatistic(HiveContext hiveContext, String twodaysago) { hiveContext.sql("use hive"); String sqlText = "select date,pageID,pv " +" from (select date,pageID,count(*) pv from userlogs " + "where action = 'view' and date = 'twodaysago' group by date,pageID ) subqurey order by pv desc limit 10"; hiveContext.sql(sqlText).show(); } private static String getTwodaysago() { SimpleDateFormat date = new SimpleDateFormat("yyyy-MM-dd"); Calendar calender = Calendar.getInstance(); calender.setTime(new Date()); calender.add(Calendar.DATE,-2); Date yesterday = calender.getTime(); return date.format(yesterday); } }