1. 程式人生 > >vertica系統管理語句 + vertica實時消費kafka

vertica系統管理語句 + vertica實時消費kafka

--看鎖表及鎖的型別
select object_name,lock_mode,transaction_id,request_timestamp,transaction_description from locks;
select transaction_id from locks where object_name like '%servefc%';
--查歷史查詢的記錄
select schema_name,table_name,user_name,query_type,is_executing,query_start from query_profiles;
select *from query_profiles;
select schema_name,table_name,user_name,query_type,is_executing,query_start from query_profiles where is_executing='t'
--查詢執行次數做多的10個SQL語句
SELECT request,COUNT(*) FROM query_requests GROUP BY request ORDER BY COUNT(*) DESC LIMIT 10; 
--查詢執行時間最長的10個SQL 
SELECT request,request_duration_ms FROM query_requests ORDER BY request_duration_ms DESC LIMIT 10; 
--查詢memory消耗最多的10個SQL
SELECT request,memory_acquired_mb FROM query_requests WHERE memory_acquired_mb IS NOT NULL ORDER BY memory_acquired_mb DESC LIMIT 10; 
--SESSION管理
select * from locks;                                              --獲取transaction_id欄位
select * from sessions where transaction_id in();                  --將上面獲取的transaction_id帶入,檢視transaction_start,判斷是否是以前鎖的
select CLOSE_SESSION ('sessionid' )                               --帶入上面查出來的session_id 
--檢視造成死鎖的那個會話session有哪些歷史操作
select c.query,c.query_start 
from locks a 
left join sessions b 
on a.transaction_id=b.transaction_id 
left join query_profiles c 
on b.session_id=c.session_id 
where a.object_name like '%tb_dw_ct_cti_agent_call_list_min%';
--資源池
select * from RESOURCE_POOLS;
--設定
SELECT GET_COMPLIANCE_STATUS();
-- 建立ODM使用者資源池,主要做外部表查詢
create resource pool pool_noas_odmMAXMEMORYSIZE '50%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 12;
-- 建立DW使用者資源池,主要做大表載入、關聯、彙總
create resource pool pool_noas_dwMAXMEMORYSIZE '90%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY AUTO;
-- 建立APP使用者資源池,主要做查詢、表關聯、指標運算
create resource pool pool_noas_appMAXMEMORYSIZE '80%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 24;
CREATE USER "user_dw"     WITH PASSWORD 'cmcc';
-- 資源池範例:低併發大查詢
CREATE RESOURCE POOL l_poolQUEUETIMEOUT NONE PLANNEDCONCURRENCY 6 MAXCONCURRENCY 4;
-- 資源池範例:高發小查詢
CREATE RESOURCE POOL s_poolMEMORYSIZE '1G' EXECUTIONPARALLELISM 4 PRIORITY 10 QUEUETIMEOUT NONEPLANNEDCONCURRENCY 36 MAXCONCURRENCY 50;
GRANT ALL  ON RESOURCE POOL l_pool TO user_dw;
ALTER USER user_dw RESOURCE POOLl_pool;
GRANT ALL  ON SCHEMA DW    TO user_dw;
-- 建立使用者
create user "dev_noas_odm"identified by 'noas_odm' resource pool pool_noas_odm;
create user "dev_noas_dw"identified by 'noas_dw' resource pool pool_noas_dw;
create user "dev_noas_app"identified by 'noas_app' resource pool pool_noas_app;
-- 建立schema
create schema if not existsnoas.noas_odm authorization dev_noas_odm;
create schema if not existsnoas.noas_dw authorization dev_noas_dw;
create schema if not existsnoas.noas_app authorization dev_noas_app;

select start_timestamp,
    request_id,
    statement_id,
    request_type,
    substr(request,1,85),
    request_duration_ms 
from query_requests 
    where request like '%prov_code as 220%' 
    and request_type ='LOAD' 
order by start_timestamp desc limit 10;

SELECT
    stream_name,
    schema_name,
    table_name,
    is_executing,
    accepted_row_count,
    rejected_row_count,
    DATEDIFF(ss,load_start::TIMESTAMP,GETDATE()::TIMESTAMP) AS DurationSec,
    ROUND((accepted_row_count+rejected_row_count)/DATEDIFF(ss,load_start::TIMESTAMP,GETDATE()::TIMESTAMP),3.0) AS RowsPerSec
FROM
    load_streams
WHERE
    is_executing='true'

	
--看錶怎麼建的
select export_objects('','tb_dw_ct_tape_new_onest_day');

select node_name,storage_path,disk_space_free_percent from disk_storage  where storage_path not ilike '%catalog%' order by disk_space_free_percent;

wangguofei=> select table_schema,count(1) as cnt from tables where table_schema like '%csap%' or table_schema like '%huangzhan%' group by table_schema order by cnt desc;

wangguofei=> select count(1) from tables where table_schema like '%csap%';
select substr(table_name,),count(1) as cnt from tables where table_schema like '%csap%' or table_schema like '%huangzhan%' group by table_schema order by cnt desc;

select  substr(table_name,regexp_instr(table_name,'_',1,3),regexp_instr(table_name,'_',1,4)-regexp_instr(table_name,'_',1,3)) ,
count(1) cnt
from tables 
where table_schema like '%csap%' 
or table_schema like '%huangzhan%' 
and table_name like 'tb_'
group by substr(table_name,regexp_instr(table_name,'_',1,3),regexp_instr(table_name,'_',1,4)-regexp_instr(table_name,'_',1,3)) order by cnt desc;

select split_part()

select count(1) from tables where table_name like '%rena%';

select sum(TABLE_SIZE_GB) from public.tb_wh_tableinfo_20180510 where  table_name like '%inre%' or table_name like '%sqm%' or table_name like '%vona%'  or table_name like '%qymn%' ;

#!/bin/bash
CurrentDir=`pwd`
VSQL='/opt/vertica/bin/vsql -Udbadmin -wvertica11'
DB_Name=CSAP_20_132
Logfile=${CurrentDir}/${DB_Name}_DB_`date "+%Y%m%d_%H%M%S"`.log
$VSQL <<EOF |tee ${Logfile}
-----------------------------echo
-----------------------------echo >>> License狀態
select get_compliance_status() ;
\\\-----------------------------echo >>> 磁碟空間檢查-檢查data目錄和catalog目錄使用情況
select node_name, storage_path, storage_usage, rank, disk_space_free_percent from disk_storage order by node_name;
-----------------------------echo >>> 客戶端連線版本 
select case when instr(client_label,'-')>0 then substr(client_label,1,instr(client_label,'-',1,2)) else client_label end client_label, count(*) from dc_session_starts 
group by 1 order by 2 desc;
-----------------------------echo >>> 節點狀態 
select node_name,last_msg_from_node_at ts, node_type, node_state, node_address,catalog_path from nodes order by node_name;
-----------------------------echo >>> CATALOG與DATA目錄 
select node_name, storage_path, storage_usage, rank, disk_space_free_percent from disk_storage order by node_name;
-----------------------------echo >>> 資源池設定情況 
select name,memorysize,maxmemorysize,plannedconcurrency ,maxconcurrency ,priority ,runtimepriority ,queuetimeout,runtimecap ,cascadeto  from resource_pools;
-----------------------------echo >>> 叢集catalog size 
select node_name,max(ts) as ts, max(catalog_size_in_MB) as catlog_size_in_MB from (select node_name,trunc((dc_allocation_pool_statistics_by_second."time")::TIMESTAMP,'
SS'::VARCHAR(2)) AS ts, sum((dc_allocation_pool_statistics_by_second.total_memory_max_value - dc_allocation_pool_statistics_by_second.free_memory_min_value))/1024//102
4 AS catalog_size_in_MB from dc_allocation_pool_statistics_by_second group by 1,trunc((dc_allocation_pool_statistics_by_second."time")::TIMESTAMP,'SS'::VARCHAR(2))) fo
o group by 1 order by 1;
-----------------------------echo >>> 資料庫原始資料大小 
select audit_start_timestamp ts,database_size_bytes/1024/1024//1024 as dbsize_gb,trunc(usage_percent,2) "use_per(%)" from license_audits where audited_data= 'Total' order by audit_start_timestamp desc limit 1;
-----------------------------echo >>> 資料庫壓縮後資料大小 
select sysdate ds,trunc(SUM(ps.wos_used_bytes+ps.ros_used_bytes)/1024/1024/1024::float) AS total_size_gb from projection_storage ps WHERE (ps.wos_used_bytes + ps.ros_u
sed_bytes) > 0 group by 1;
-----------------------------echo >>> 表分割槽數 
select sysdate ds,table_schema,projection_name,count(distinct partition_key) partition_cnt, avg(ROS_ROW_COUNT) avg_rows from partitions group by 1,2,3 having count(dis
tinct partition_key)>900 order by 4 desc,2 limit 10;
-----------------------------echo >>> 緯度表或小表不合理分割槽 
select distinct a.table_schema||'.'||t.table_name as table_name,a.is_segmented, substr(t.partition_expression,instr(t.partition_expression,'.')+1) partition_exp, a.par
tition_cnt,a.rows_cnt,a.avg_partition_rows_cnt from (select pt.table_schema,pt.projection_name,pj.anchor_table_id,pj.is_segmented, count(distinct pt.partition_key) par
tition_cnt, case when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end rows_cnt, (case 
when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end)//count(distinct pt.partition_key
) avg_partition_rows_cnt from partitions pt join projections pj using(projection_id) group by 1,2,3,4) a join tables t on t.table_id = a.anchor_table_id where a.rows_c
nt < 10000000 order by a.avg_partition_rows_cnt,a.rows_cnt desc, partition_cnt desc limit 10;
-----------------------------echo >>> 事實表不合理分割槽 
select distinct a.table_schema||'.'||a.table_name as table_name,a.is_segmented, a.rows_cnt from (select t.table_schema,t.table_name,pj.projection_name,pj.is_segmented,
 case when pj.is_segmented then sum(pt.ros_row_count) when not pj.is_segmented then sum(pt.ros_row_count)//count(distinct pt.node_name) end rows_cnt from projections p
j join tables t on pj.anchor_table_id = t.table_id join projection_storage pt using (projection_id) group by 1,2,3,4) a where a.rows_cnt >= 1000000 and not exists(sele
ct 'x' from partitions p where p.projection_name = a.projection_name) order by a.rows_cnt desc limit 10;
-----------------------------echo >>> 維度表或小表不做資料分片 
select p.projection_Schema || '.' || p.anchor_Table_name, sum(ps.ros_row_count) from projections p, projection_storage ps where p.projection_name = ps.projection_name 
and ps.ros_row_count < 1000000 and p.is_segmented group by 1 order by 2 asc limit 10;
-----------------------------echo >>> 事實表進行資料分片 
select proj, row_count/(proj_count) as table_row_count from ( select p.projection_schema || '.' || p.anchor_table_name as proj, sum(ps.ros_row_count) as row_Count, cou
nt(distinct ps.projection_name) as proj_count from projections p, projection_storage ps where p.projection_name = ps.projection_name and p.projection_schema = ps.proje
ction_schema and not p.is_segmented group by 1 ) pps where row_count/(proj_count) > 1000000 order by table_row_count desc limit 10;
-----------------------------echo >>> 資料分佈傾斜 
select projection,min_used_bytes//1024^3 min_used_GB, max_used_bytes//1024^3 max_used_GB,round(skew_pct::float,2) skew_pct from (select distinct trim(ps.projection) pr
ojection, first_value(used_bytes) over (w order by used_bytes asc) as min_used_bytes, first_value(used_bytes) over (w order by used_bytes desc) as max_used_bytes, firs
t_value(used_bytes) over (w order by used_bytes asc) /first_value(used_bytes) over (w order by used_bytes desc) as skew_pct from (select node_name, projection_id, proj
ection_schema || '.' || projection_name as projection, sum(used_bytes) as used_bytes from projection_storage group by 1,2,3 ) as ps join projections p using (projectio
n_id) where p.is_segmented and ps.used_bytes > 0 window w as (partition by ps.projection)) t where skew_pct< 0.8 order by 4 limit 10;
-----------------------------echo >>> 表模型Projection個數 
select t.table_schema,t.table_name, count(distinct p.projection_name) projection_cnt from tables t join projections p on t.table_id = p.anchor_table_id group by 1,2 having count(distinct p.projection_id)>10 order by 3 desc limit 10;
-----------------------------echo >>> 每節點投影的ROS容器個數
select projection_name, node_name, sum(ros_count) as ros_cnt from projection_storage group by projection_name, node_name having sum(ros_count)>900 order by ros_cnt desc; 
-----------------------------echo >>> 未使用的投影 
select anchor_table_name from projections where projection_name not in (select projection_name from projection_usage);
-----------------------------echo >>> SQL執行類別統計 
select query_type,case when query_duration_us < 1000000 then 'A. sub-second'
when query_duration_us between 1000000 and 3000000 then 'B. 1-3 seconds'
when query_duration_us between 3000000 and 7000000 then 'C. 3-7 seconds'
when query_duration_us between 7000000 and 15000000 then 'D. 7-15 seconds'
when query_duration_us between 15000000 and 30000000 then 'E. 15-30 seconds'
when query_duration_us between 30000000 and 60000000 then 'F. 30-60 seconds'
when query_duration_us between 60000000 and 180000000 then 'G. 1-3 minutes'
when query_duration_us between 180000000 and 600000000 then 'H. 3-10 minutes'
when query_duration_us between 600000000 and 1800000000 then 'I. 10-30 minutes'
when query_duration_us > 1800000000 then 'J. more than 30 minutes' end, count(*)
from query_profiles group by 1,2 order by 1,2 asc ;

-----------------------------echo >>> 大表統計 
select projection_schema, anchor_table_name, to_char(sum(used_bytes)/1024/1024/1024,'999,999.99') as disk_space_used_gb from projection_storage group by projection_schema, anchor_table_name order by disk_space_used_gb desc limit 10;
-----------------------------echo >>> Top SQL 
select query_duration_us, table_name, user_name, processed_row_count as rows_processed, substr(query, 0,70) from query_profiles order by query_duration_us desc limit 10;



---------------近12小時內平均執行時常------------------
select 
(now() - 1/24) from_date, 
now(),  
query_type,  
count(1) total_exec_sql,   
min(query_duration_us//1000) min_ms,   
max(query_duration_us//1000) max_ms,   
avg(query_duration_us//1000) avg_ms
from query_profiles 
where query_start::timestamp > now() - 1/24 
group by 1,2,3 order by 7 desc ;

------------------------------json 入庫vertica-------------------------------
[email protected]
:[/home/dbadmin]cat json.dat { "name": "Everest", "type":"mountain", "height": 29029, "hike_safety": 34.1 } { "name": "Mt St Helens", "type": "volcano", "hike_safety": 15.4 } CREATE TABLE mountains(name varchar(64), type varchar(32), height integer); COPY mountains FROM local '/home/dbadmin/json.dat' WITH PARSER fjsonParser(); -- Compute New Values for the Target Table https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/AdministratorsGuide/BulkLoadCOPY/IgnoringColumnsAndFieldsInTheLoadFile.htm --------------------------入庫的同時,對資料進行轉換------------------------------------------- CREATE TABLE names(first_name VARCHAR(20), last_name VARCHAR(20), full_name VARCHAR(60)); CREATE TABLE => COPY names(first_name,middle_name FILLER VARCHAR(20),last_name,full_name AS first_name||' '||middle_name||' '||last_name) FROM STDIN; Enter data to be copied followed by a newline. End with a backslash and a period on a line by itself. --------------------例子---------------------------- copy tb_dw_ct_knba_klg_webpage_day_yzg (statis_date as to_char(to_timestamp(op_time/1000)::date,'yyyymmdd'),data_time,data_ip,data_type,sis_id ,prov_code,serial_num ,staff_id,call_num,op_time,title,url,refer, page_loadtime,dom_loadtime ,white_time ,konwledge_channel,knowledge_id ,event_value,call_bgntime ,call_endtime) from local '/data/interface/servefc/zhishikubak/test/page_zhishiku_2018-06-13_13079.txt' select distinct(substr(request,1,100)),request_duration_ms,start_timestamp from query_requests where request like '%select%'order by start_timestamp desc limit 50

vertica實時消費kafka入庫操作的sh指令碼配置:

# Vertica 8.1.0
kafka_config=" --config-schema kafka_date_dimension4 --dbhost 192.168.1.1 --username dbadmin --password xxxx"

# shutdown instance
/opt/vertica/packages/kafka/bin/vkconfig shutdown --instance-name kafka_date_dimension4 ${kafka_config}
echo "Shutdown Instance Complete!"
# truncate table
$VSQL <<- EOF
drop schema kafka_date_dimension4 cascade;
truncate table csapsmpl.tb_svr_u_cntmng;
EOF

# Create and Configure Scheduler
/opt/vertica/packages/kafka/bin/vkconfig scheduler --create --add ${kafka_config} --frame-duration '00:00:10' --eof-timeout-ms 3000 --operator dbadmin
echo "Create and Configure Scheduler Complete!"

# Create a Cluster
/opt/vertica/packages/kafka/bin/vkconfig cluster --create --cluster kafka_cluster --hosts 192.168.125.199:6667,192.168.125.136:6667,192.168.125.110:6667 ${kafka_config}
echo "Create Cluster Complete!"

# Create a Data Table


# Create a Source
/opt/vertica/packages/kafka/bin/vkconfig source --create --source KAFKA_CSAP_SERVERCORE_USMPL_CNTMNGT_TOPIC  --cluster kafka_cluster --partitions 1 ${kafka_config}
echo "Create Kafka Source Complete!"

# Create a Target
/opt/vertica/packages/kafka/bin/vkconfig target --create --target-schema csapsmpl --target-table tb_svr_u_cntmng ${kafka_config}
echo "Create Target Complete!"

# Create a Load-Spec
/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_date_dimension_spec2 --parser KafkaJSONParser --parser-parameters flatten_arrays=False,flatten_maps=False ${kafka_config}
#/opt/vertica/packages/kafka/bin/vkconfig load-spec --create --load-spec load_date_dimension_spec --parser KafkaJSONParser --filters "FILTER KafkaInsertDelimiters(delimiter=E'\n')" ${kafka_config}

echo "Create Load-Spec Complete!"

# Create a Microbatch
/opt/vertica/packages/kafka/bin/vkconfig microbatch --create --microbatch tb_svr_u_cntmng --target-schema csapsmpl --target-table tb_svr_u_cntmng --rejection-schema csapsmpl --rejection-table tb_svr_u_cntmng_rej --load-spec load_date_dimension_spec2 --add-source KAFKA_CSAP_SERVERCORE_USMPL_CNTMNGT_TOPIC --add-source-cluster kafka_cluster ${kafka_config}
echo "Create Microbatch Complete!"

# Launch the Scheduler
/opt/vertica/packages/kafka/bin/vkconfig launch --instance-name load_date_dimension_spec2 ${kafka_config} &
echo "Launch the Scheduler Complete!"
echo "Done!"