1. 程式人生 > >hive UDF 開發示例

hive UDF 開發示例

一、建立一個java專案

對應的pom檔案

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.credithc</groupId>
    <artifactId>hive_udf_v1.0</artifactId>
    <version>1.0-SNAPSHOT</version>


    <!-- 根據要連線的hadoop和hive,設定版本引數 -->
    <properties>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <maven-compiler-plugin.version>3.7.0</maven-compiler-plugin.version>

    </properties>

    <!-- 因為使用CDH的hadoop和hive,因此要新增CDH的官方repository,才能夠下載相應的依賴包 -->
    <!-- 如果使用Apache版本的hadoop和hive,則不需要新增該repository -->
    <repositories>
        <repository>
            <id>cloudera</id>
            <url>http://repository.cloudera.com/artifactory/cloudera-repos</url>
        </repository>
    </repositories>
    <dependencies>
        <!-- 新增依賴元件,根據上方配置的版本引數和repository知識庫下載依賴 -->
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch-hadoop-mr</artifactId>
            <version>5.6.3</version>
        </dependency>

        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch-hadoop-hive</artifactId>
            <version>5.6.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>2.0.0</version>
        </dependency>
        <!-- junit是java的單元測試框架 -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.10</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-resources-plugin</artifactId>
            <version>2.4.3</version>
        </dependency>

    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.5.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

二、UDF函式建立開發:

package com.credithc.rc.kg.udf;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by glin on 2018/11/1 0001.    com.credithc.rc.kg.udf.MessageDecodeUdf
 */
public class MessageDecodeUdf extends UDF{

    public MessageDecodeUdf(){
    }

    public String evaluate(String str,String params) {
        if(StringUtils.isEmpty(str)||StringUtils.isEmpty(params))
            return null;
        String re = null;
        try {
            switch (params) {
                //時間抽取
                case "time":
                    re = parserTime(str);
                    break;
                //銀行名稱抽取
                case "bankName":
                    re = parserBankName(str);
                    break;
            }
        }catch (Exception e){

        }
        return re;
    }

    public String parserTime(String str){
        //提取時間
        Pattern p0 =Pattern.compile("\\d{4}年\\d{1,2}月\\d{1,2}日|\\d{1,2}月\\d{1,2}日|\\d{4}[-|/|.]\\d{1,2}[-|/|.]\\d{1,2}");
        //時間匹配
        Matcher m0 = p0.matcher(str);
        if(m0.find()){
            return m0.group(0);
        }else{
            return null;
        }
    }


    public String parserBankName(String str){
        //提取[]裡的內容
        Pattern p1 = Pattern.compile("\\[(.+?銀行)\\]");
        //銀行
        Matcher m1 = p1.matcher(str);
        if(m1.find()){
            return m1.group(1);
        }else{
            return null;
        }
    }




    }

public static void main(String[] args) {
        MessageDecodeUdf dd = new MessageDecodeUdf();
        System.out.println(dd.evaluate(" 。下載“中國建設銀行”手機銀行APP 。[建設銀行]", "time"));

    }
}

測試執行結果:

三、匯出 jar包:

三、上傳hive測試:

找到該函式,滑鼠右鍵選擇Copy Reference 獲得該函式的全路徑:com.credithc.ss.sd.udf.MessageDecodeUdf

-- 將上傳的jar包匯入到classpath變數裡

hdfs dfs -put /home/sd/test/hive_udf_v1.0-1.0-SNAPSHOT.jar user/sd/hive_udf/

list jars; -- 檢視匯入的jar包

create temporary function message_udf as 'com.credithc.ss.sd.udf.MessageDecodeUdf'; -- 建立一個臨時函式,關聯該jar包

使用測試:

select  message_udf ( str, params) from kkkk;