Azure Synapse Analytics SQLプール上で開発や運用/保守を行う際によく利用するSQLをまとめてみました。
#リソース関連
Azure Synapse Analytics SQLプールの処理能力(cDWU)の見積方法を考察と検証してみた
Azure Synapse Analyticsでデータをロードする時にファイルは分割した方がいい?分割しなくてもいい?
##cDWUの変更
masterデータベースで実行
ALTER DATABASE "<対象のデータベース名>"
MODIFY (SERVICE_OBJECTIVE = 'DW<100~30000までで指定>c');
##cDWUの変更状態の確認
masterデータベースで実行
SELECT
*
FROM sys.dm_operation_status
WHERE resource_type_desc = 'Database'
AND major_resource_id = '<対象のデータベース名>';
##cDWUの確認
masterデータベースで実行
SELECT
db.name [Database]
,ds.edition [Edition]
,ds.service_objective [Service Objective]
FROM
sys.database_service_objectives AS ds
JOIN
sys.databases AS db ON ds.database_id = db.database_id
WHERE
ds.edition='DataWarehouse';
#ユーザの追加とリソースクラスの変更
Azure Synapse Analytics SQLプールを高速にするリソースクラス
masterデータベースで実行
--masterデータベースへログインユーザの作成
CREATE LOGIN <ユーザ名> WITH PASSWORD = '<パスワード>';
--masterデータベースへユーザの追加
CREATE USER <ユーザ名> for LOGIN <ユーザ名>;
対象のデータベースで実行
--ユーザデータベースへユーザーを追加
CREATE USER <ユーザ名> for LOGIN <ユーザ名>;
--ユーザにCONTROL権限を付与
GRANT CONTROL ON DATABASE::"<ユーザーデータベース名>" to xlargercuser;
--対象のユーザへリソースクラスを付与
EXEC sp_addrolemember '<リソースクラス>', '<ユーザ名>';
#ワークロードグループ/分類子
Azure Synapse Analytics SQLプールの実行クエリの重みづけ(重要度)
Azure Synapse Analytics SQLプールにおけるクエリ実行時のリソース管理
##ワークロードグループの確認
SELECT * FROM sys.workload_management_workload_groups ORDER BY 1;
##ワークロード分類子の確認
SELECT *
FROM sys.workload_management_workload_classifiers
ORDER BY 1;
##ワークロードグループの実行統計
SELECT * FROM sys.dm_workload_management_workload_groups_stats ORDER BY 1;
##実行されたクエリの重度の確認
SELECT s.login_name, r.status, r.importance, r.submit_time, r.start_time
FROM sys.dm_pdw_exec_sessions s
JOIN sys.dm_pdw_exec_requests r ON s.session_id = r.session_id
WHERE r.resource_class is not null
ORDER BY r.start_time;
#リザルトキャッシュ
Azure Synapse AnalyticsのResult-set Caching機能を試してみた
##リザルトキャッシュの確認
--1 = 結果セットのキャッシュが有効
--0 = 結果セットのキャッシュが無効
SELECT is_result_set_caching_on
FROM sys.databases
WHERE name = '<データベース名>';
##リザルトキャッシュの有効化
ALTER DATABASE <データベース名>
SET RESULT_SET_CACHING ON;
#統計情報
Azure Synapse Analytics SQLプールでの統計情報の管理
##統計の自動機能の確認
--1は機能がON、0は機能がOFF
select
name
,is_auto_create_stats_on
,is_auto_update_stats_on
from sys.databases order by name;
##統計の自動取得機能のON/OFF
--統計の自動取得機能のON
ALTER DATABASE <データベース名>
SET AUTO_CREATE_STATISTICS ON;
--統計の自動取得機能のOFF
ALTER DATABASE <データベース名>
SET AUTO_CREATE_STATISTICS OFF;
##統計情報確認用のVIEWの作成
IF OBJECT_ID('vstats_columns') IS NOT NULL
DROP view vstats_columns
GO
CREATE VIEW dbo.vstats_columns
AS
SELECT
sm.[name] AS [schema_name]
, tb.[name] AS [table_name]
, st.[name] AS [stats_name]
, st.[filter_definition] AS [stats_filter_definition]
, st.[has_filter] AS [stats_is_filtered]
, STATS_DATE(st.[object_id],st.[stats_id])
AS [stats_last_updated_date]
, co.[name] AS [stats_column_name]
, ty.[name] AS [column_type]
, co.[max_length] AS [column_max_length]
, co.[precision] AS [column_precision]
, co.[scale] AS [column_scale]
, co.[is_nullable] AS [column_is_nullable]
, co.[collation_name] AS [column_collation_name]
, QUOTENAME(sm.[name])+'.'+QUOTENAME(tb.[name])
AS two_part_name
, QUOTENAME(DB_NAME())+'.'+QUOTENAME(sm.[name])+'.'+QUOTENAME(tb.[name])
AS three_part_name
FROM sys.objects AS ob
JOIN sys.stats AS st ON ob.[object_id] = st.[object_id]
JOIN sys.stats_columns AS sc ON st.[stats_id] = sc.[stats_id]
AND st.[object_id] = sc.[object_id]
JOIN sys.columns AS co ON sc.[column_id] = co.[column_id]
AND sc.[object_id] = co.[object_id]
JOIN sys.types AS ty ON co.[user_type_id] = ty.[user_type_id]
JOIN sys.tables AS tb ON co.[object_id] = tb.[object_id]
JOIN sys.schemas AS sm ON tb.[schema_id] = sm.[schema_id]
WHERE 1=1
GO
##取得されている列統計の確認(ビュー:vstats_columns
の作成必須)
select * from vstats_columns
where table_name = '<テーブル名>' and stats_last_updated_date is not null;
##列統計の取得
--列統計の取得(デフォルト20%※10億件をこえると2%)
CREATE STATISTICS <列統計名> ON <テーブル名>(<列名>);
--列統計の取得(サンプルサイズの指定)
CREATE STATISTICS <列統計名> ON <テーブル名>(<列名>) WITH SAMPLE <パーセント指定> PERCENT;
--列統計の取得(全行)
CREATE STATISTICS <列統計名> ON <テーブル名>(<列名>) WITH FULLSCAN;
##列統計の更新
--列統計の更新(テーブル内で取得されている列統計すべて)
UPDATE STATISTICS <テーブル名>;
--列統計の更新(列統計単体)
UPDATE STATISTICS dbo.lineitem(<列統計名>);
#テーブル/INDEX/制約
注意が必要な一意キー、主キーの利用(Azure Synapse Analytics SQLプール)
##テーブルのリネーム
rename object <元のテーブル名> to <新しいテーブル名>;
##一意キーの作成
ALTER TABLE <テーブル名> ADD CONSTRAINT <制約名> UNIQUE(<カラム名>[,<カラム名>・・・]) NOT ENFORCED;
##主キーの作成
ALTER TABLE <テーブル名> ADD CONSTRAINT <制約名> PRIMARY KEY NONCLUSTERED (<カラム名>[,<カラム名>・・・]) NOT ENFORCED;
##制約の削除
ALTER TABLE <テーブル名> DROP CONSTRAINT <制約名>;
#パーティション関連
Azure Synapse Analytics SQLプールのパーティションについて
Azure Synapse Analytics SQLプールのパーティションメンテナンス方法
##パーティション毎の件数確認
SELECT QUOTENAME(s.[name])+'.'+QUOTENAME(t.[name]) as Table_name
, i.[name] as Index_name
, p.partition_number as Partition_nmbr
, p.[rows] as Row_count
, p.[data_compression_desc] as Data_Compression_desc
FROM sys.partitions p
JOIN sys.tables t ON p.[object_id] = t.[object_id]
JOIN sys.schemas s ON t.[schema_id] = s.[schema_id]
JOIN sys.indexes i ON p.[object_id] = i.[object_Id]
AND p.[index_Id] = i.[index_Id]
WHERE t.[name] = '<テーブル名>'
;
##ディストリビューション/パーティション毎の件数の確認
DBCC PDW_SHOWPARTITIONSTATS('<スキーマ名>.<パーティション名>');
##パーティションの情報の確認
SELECT
t.name AS TableName,
i.name AS IndexName,
p.partition_number,
p.partition_id,
f.type_desc,
c.name AS PartitioningColumnName,
r.value
FROM sys.tables AS t
JOIN sys.indexes AS i
ON t.object_id = i.object_id
JOIN sys.partitions AS p
ON i.object_id = p.object_id AND i.index_id = p.index_id
JOIN sys.partition_schemes AS s
ON i.data_space_id = s.data_space_id
JOIN sys.partition_functions AS f
ON s.function_id = f.function_id
LEFT JOIN sys.partition_range_values AS r
ON f.function_id = r.function_id and r.boundary_id = p.partition_number
JOIN sys.index_columns AS ic
ON ic.object_id = i.object_id
AND ic.index_id = i.index_id AND ic.partition_ordinal > 0
JOIN sys.columns AS c
ON t.object_id = c.object_id
WHERE t.name = '<テーブル名>'
AND c.column_id = ic.column_id
ORDER BY p.partition_number;
#マテリアライズドビュー
Azure Synapse Analyticsのマテリアライズドビュー使ってみた
##マテリアライズドビューの確認
SELECT V.name as materialized_view, V.object_id
FROM sys.views V
JOIN sys.indexes I ON V.object_id= I.object_id AND I.index_id < 2;
##クエリに対する推奨マテリアライズドビューの表示
EXPLAIN WITH_RECOMMENDATIONS <対象のクエリ>;
#テーブルの容量/テーブル内のデータの偏り
データの偏りに注意が必要(Azure Synapse Analytics SQLプール)
CLUSTERED COLUMNSTORE INDEXが圧縮されるまで検証してみた(Azure Synapse Analytics SQLプール)
##データの偏りを確認
DBCC PDW_SHOWSPACEUSED('LINEITEM_TEST');
##テーブルの容量/データの偏りなどを確認するためのVIEWの作成(ビュー:vTableSizes
)
CREATE VIEW dbo.vTableSizes
AS
WITH base
AS
(
SELECT
GETDATE() AS [execution_time]
, DB_NAME() AS [database_name]
, s.name AS [schema_name]
, t.name AS [table_name]
, QUOTENAME(s.name)+'.'+QUOTENAME(t.name) AS [two_part_name]
, nt.[name] AS [node_table_name]
, ROW_NUMBER() OVER(PARTITION BY nt.[name] ORDER BY (SELECT NULL)) AS [node_table_name_seq]
, tp.[distribution_policy_desc] AS [distribution_policy_name]
, c.[name] AS [distribution_column]
, nt.[distribution_id] AS [distribution_id]
, i.[type] AS [index_type]
, i.[type_desc] AS [index_type_desc]
, nt.[pdw_node_id] AS [pdw_node_id]
, pn.[type] AS [pdw_node_type]
, pn.[name] AS [pdw_node_name]
, di.name AS [dist_name]
, di.position AS [dist_position]
, nps.[partition_number] AS [partition_nmbr]
, nps.[reserved_page_count] AS [reserved_space_page_count]
, nps.[reserved_page_count] - nps.[used_page_count] AS [unused_space_page_count]
, nps.[in_row_data_page_count]
+ nps.[row_overflow_used_page_count]
+ nps.[lob_used_page_count] AS [data_space_page_count]
, nps.[reserved_page_count]
- (nps.[reserved_page_count] - nps.[used_page_count])
- ([in_row_data_page_count]
+ [row_overflow_used_page_count]+[lob_used_page_count]) AS [index_space_page_count]
, nps.[row_count] AS [row_count]
from
sys.schemas s
INNER JOIN sys.tables t
ON s.[schema_id] = t.[schema_id]
INNER JOIN sys.indexes i
ON t.[object_id] = i.[object_id]
AND i.[index_id] <= 1
INNER JOIN sys.pdw_table_distribution_properties tp
ON t.[object_id] = tp.[object_id]
INNER JOIN sys.pdw_table_mappings tm
ON t.[object_id] = tm.[object_id]
INNER JOIN sys.pdw_nodes_tables nt
ON tm.[physical_name] = nt.[name]
INNER JOIN sys.dm_pdw_nodes pn
ON nt.[pdw_node_id] = pn.[pdw_node_id]
INNER JOIN sys.pdw_distributions di
ON nt.[distribution_id] = di.[distribution_id]
INNER JOIN sys.dm_pdw_nodes_db_partition_stats nps
ON nt.[object_id] = nps.[object_id]
AND nt.[pdw_node_id] = nps.[pdw_node_id]
AND nt.[distribution_id] = nps.[distribution_id]
LEFT OUTER JOIN (select * from sys.pdw_column_distribution_properties where distribution_ordinal = 1) cdp
ON t.[object_id] = cdp.[object_id]
LEFT OUTER JOIN sys.columns c
ON cdp.[object_id] = c.[object_id]
AND cdp.[column_id] = c.[column_id]
WHERE pn.[type] = 'COMPUTE'
)
, size
AS
(
SELECT
[execution_time]
, [database_name]
, [schema_name]
, [table_name]
, [two_part_name]
, [node_table_name]
, [node_table_name_seq]
, [distribution_policy_name]
, [distribution_column]
, [distribution_id]
, [index_type]
, [index_type_desc]
, [pdw_node_id]
, [pdw_node_type]
, [pdw_node_name]
, [dist_name]
, [dist_position]
, [partition_nmbr]
, [reserved_space_page_count]
, [unused_space_page_count]
, [data_space_page_count]
, [index_space_page_count]
, [row_count]
, ([reserved_space_page_count] * 8.0) AS [reserved_space_KB]
, ([reserved_space_page_count] * 8.0)/1000 AS [reserved_space_MB]
, ([reserved_space_page_count] * 8.0)/1000000 AS [reserved_space_GB]
, ([reserved_space_page_count] * 8.0)/1000000000 AS [reserved_space_TB]
, ([unused_space_page_count] * 8.0) AS [unused_space_KB]
, ([unused_space_page_count] * 8.0)/1000 AS [unused_space_MB]
, ([unused_space_page_count] * 8.0)/1000000 AS [unused_space_GB]
, ([unused_space_page_count] * 8.0)/1000000000 AS [unused_space_TB]
, ([data_space_page_count] * 8.0) AS [data_space_KB]
, ([data_space_page_count] * 8.0)/1000 AS [data_space_MB]
, ([data_space_page_count] * 8.0)/1000000 AS [data_space_GB]
, ([data_space_page_count] * 8.0)/1000000000 AS [data_space_TB]
, ([index_space_page_count] * 8.0) AS [index_space_KB]
, ([index_space_page_count] * 8.0)/1000 AS [index_space_MB]
, ([index_space_page_count] * 8.0)/1000000 AS [index_space_GB]
, ([index_space_page_count] * 8.0)/1000000000 AS [index_space_TB]
FROM base
)
SELECT *
FROM size
;
##テーブルの件数とサイズを確認(ビュー:vTableSizes
の作成必須)
SELECT
database_name
, schema_name
, table_name
, distribution_policy_name
, distribution_column
, index_type_desc
, COUNT(distinct partition_nmbr) as nbr_partitions
, SUM(row_count) as table_row_count
, SUM(reserved_space_GB) as table_reserved_space_GB
, SUM(data_space_GB) as table_data_space_GB
, SUM(index_space_GB) as table_index_space_GB
, SUM(unused_space_GB) as table_unused_space_GB
FROM
dbo.vTableSizes
GROUP BY
database_name
, schema_name
, table_name
, distribution_policy_name
, distribution_column
, index_type_desc
ORDER BY
table_reserved_space_GB desc
;
##10%以上のデータの偏りが発生している対象の特定(ビュー:vTableSizes
の作成必須)
--DATA_SKEW列が0.1以上のものが対象
select
two_part_name
,partition_nmbr
,max(row_count * 1.000) AS MAX_ROWS
,min(row_count * 1.000) AS NIN_ROWS
,ROUND((max(row_count * 1.000) - min(row_count * 1.000))/max((row_count+1) * 1.000),4) AS DATA_SKEW
from dbo.vTableSizes
group by
two_part_name
,partition_nmbr
order by
DATA_SKEW desc
;
##分散タイプ毎の容量の確認(ビュー:vTableSizes
の作成必須)
SELECT
distribution_policy_name
, SUM(row_count) as table_type_row_count
, SUM(reserved_space_GB) as table_type_reserved_space_GB
, SUM(data_space_GB) as table_type_data_space_GB
, SUM(index_space_GB) as table_type_index_space_GB
, SUM(unused_space_GB) as table_type_unused_space_GB
FROM dbo.vTableSizes
GROUP BY distribution_policy_name
;
##インデックスタイプごとの容量の確認(ビュー:vTableSizes
の作成必須)
SELECT
index_type_desc
, SUM(row_count) as table_type_row_count
, SUM(reserved_space_GB) as table_type_reserved_space_GB
, SUM(data_space_GB) as table_type_data_space_GB
, SUM(index_space_GB) as table_type_index_space_GB
, SUM(unused_space_GB) as table_type_unused_space_GB
FROM dbo.vTableSizes
GROUP BY index_type_desc
;
##クラスター化列ストアインデックスの行グループごとの平均行数を計算するビュー(ビュー:vColumnstoreDensity
)
CREATE VIEW dbo.vColumnstoreDensity
AS
SELECT
GETDATE() AS [execution_date]
, DB_Name() AS [database_name]
, s.name AS [schema_name]
, t.name AS [table_name]
, COUNT(DISTINCT rg.[partition_number]) AS [table_partition_count]
, SUM(rg.[total_rows]) AS [row_count_total]
, SUM(rg.[total_rows])/COUNT(DISTINCT rg.[distribution_id]) AS [row_count_per_distribution_MAX]
, CEILING ((SUM(rg.[total_rows])*1.0/COUNT(DISTINCT rg.[distribution_id]))/1048576) AS [rowgroup_per_distribution_MAX]
, SUM(CASE WHEN rg.[State] = 0 THEN 1 ELSE 0 END) AS [INVISIBLE_rowgroup_count]
, SUM(CASE WHEN rg.[State] = 0 THEN rg.[total_rows] ELSE 0 END) AS [INVISIBLE_rowgroup_rows]
, MIN(CASE WHEN rg.[State] = 0 THEN rg.[total_rows] ELSE NULL END) AS [INVISIBLE_rowgroup_rows_MIN]
, MAX(CASE WHEN rg.[State] = 0 THEN rg.[total_rows] ELSE NULL END) AS [INVISIBLE_rowgroup_rows_MAX]
, AVG(CASE WHEN rg.[State] = 0 THEN rg.[total_rows] ELSE NULL END) AS [INVISIBLE_rowgroup_rows_AVG]
, SUM(CASE WHEN rg.[State] = 1 THEN 1 ELSE 0 END) AS [OPEN_rowgroup_count]
, SUM(CASE WHEN rg.[State] = 1 THEN rg.[total_rows] ELSE 0 END) AS [OPEN_rowgroup_rows]
, MIN(CASE WHEN rg.[State] = 1 THEN rg.[total_rows] ELSE NULL END) AS [OPEN_rowgroup_rows_MIN]
, MAX(CASE WHEN rg.[State] = 1 THEN rg.[total_rows] ELSE NULL END) AS [OPEN_rowgroup_rows_MAX]
, AVG(CASE WHEN rg.[State] = 1 THEN rg.[total_rows] ELSE NULL END) AS [OPEN_rowgroup_rows_AVG]
, SUM(CASE WHEN rg.[State] = 2 THEN 1 ELSE 0 END) AS [CLOSED_rowgroup_count]
, SUM(CASE WHEN rg.[State] = 2 THEN rg.[total_rows] ELSE 0 END) AS [CLOSED_rowgroup_rows]
, MIN(CASE WHEN rg.[State] = 2 THEN rg.[total_rows] ELSE NULL END) AS [CLOSED_rowgroup_rows_MIN]
, MAX(CASE WHEN rg.[State] = 2 THEN rg.[total_rows] ELSE NULL END) AS [CLOSED_rowgroup_rows_MAX]
, AVG(CASE WHEN rg.[State] = 2 THEN rg.[total_rows] ELSE NULL END) AS [CLOSED_rowgroup_rows_AVG]
, SUM(CASE WHEN rg.[State] = 3 THEN 1 ELSE 0 END) AS [COMPRESSED_rowgroup_count]
, SUM(CASE WHEN rg.[State] = 3 THEN rg.[total_rows] ELSE 0 END) AS [COMPRESSED_rowgroup_rows]
, SUM(CASE WHEN rg.[State] = 3 THEN rg.[deleted_rows] ELSE 0 END) AS [COMPRESSED_rowgroup_rows_DELETED]
, MIN(CASE WHEN rg.[State] = 3 THEN rg.[total_rows] ELSE NULL END) AS [COMPRESSED_rowgroup_rows_MIN]
, MAX(CASE WHEN rg.[State] = 3 THEN rg.[total_rows] ELSE NULL END) AS [COMPRESSED_rowgroup_rows_MAX]
, AVG(CASE WHEN rg.[State] = 3 THEN rg.[total_rows] ELSE NULL END) AS [COMPRESSED_rowgroup_rows_AVG]
, 'ALTER INDEX ALL ON ' + s.name + '.' + t.NAME + ' REBUILD;' AS [Rebuild_Index_SQL]
FROM sys.[pdw_nodes_column_store_row_groups] rg
JOIN sys.[pdw_nodes_tables] nt ON rg.[object_id] = nt.[object_id]
AND rg.[pdw_node_id] = nt.[pdw_node_id]
AND rg.[distribution_id] = nt.[distribution_id]
JOIN sys.[pdw_table_mappings] mp ON nt.[name] = mp.[physical_name]
JOIN sys.[tables] t ON mp.[object_id] = t.[object_id]
JOIN sys.[schemas] s ON t.[schema_id] = s.[schema_id]
GROUP BY
s.[name]
, t.[name];
##クラスター化列ストアインデックスの列ストア、行ストアの情報取得(ビュー:vColumnstoreDensity
の作成必須)
select
table_name
,table_partition_count
,row_count_total
,row_count_per_distribution_MAX
,COMPRESSED_rowgroup_rows
,COMPRESSED_rowgroup_count
,COMPRESSED_rowgroup_rows_AVG
,COMPRESSED_rowgroup_rows_MIN
,COMPRESSED_rowgroup_rows_MAX
,COMPRESSED_rowgroup_rows_DELETED
,OPEN_rowgroup_rows
,OPEN_rowgroup_count
,OPEN_rowgroup_rows_AVG
,OPEN_rowgroup_rows_MIN
,OPEN_rowgroup_rows_MAX
,CLOSED_rowgroup_rows
,CLOSED_rowgroup_count
,Rebuild_Index_SQL
from
vColumnstoreDensity;
#SQL監査
SQLの監査を取得する(Azure Synapse Analytics SQLプール)
##SQL監査ログの確認
select
*
from
sys.fn_get_audit_file('https://<ストレージアカウント名>.blob.core.windows.net/sqldbauditlogs/<サーバー名>/<データベース名>/SqlDbAuditing_Audit',default,default);
##監査分類の確認
SELECT
SCHEMA_NAME(sys.all_objects.schema_id) as SchemaName,
sys.all_objects.name AS [TableName], sys.all_columns.name As [ColumnName],
[Label], [Label_ID], [Information_Type], [Information_Type_ID], [Rank], [Rank_Desc]
FROM
sys.sensitivity_classifications
left join sys.all_objects on sys.sensitivity_classifications.major_id = sys.all_objects.object_id
left join sys.all_columns on sys.sensitivity_classifications.major_id = sys.all_columns.object_id and sys.sensitivity_classifications.minor_id = sys.all_columns.column_id;
#動的データマスキング
SQL Serverの動的データマスキングを使ってみた
##動的データマスキングの確認
SELECT c.name, tbl.name as table_name, c.is_masked, c.masking_function
FROM sys.masked_columns AS c
JOIN sys.tables AS tbl
ON c.[object_id] = tbl.[object_id]
WHERE is_masked = 1;
##動的データマスキングの設定/変更
--動的データマスキングの設定/変更(default)
ALTER TABLE <テーブル名>
ALTER COLUMN <カラム名> ADD MASKED WITH (FUNCTION = 'default()');
--動的データマスキングの設定/変更(Email)
ALTER TABLE <テーブル名>
ALTER COLUMN <カラム名> ADD MASKED WITH (FUNCTION = 'email()');
--動的データマスキングの設定/変更(ランダム)
ALTER TABLE <テーブル名>
ALTER COLUMN <カラム名> ADD MASKED WITH (FUNCTION = 'random(<レンジの最初の数字>, <レンジの最後の数字>)');
--動的データマスキングの設定/変更(カスタム)
ALTER TABLE <テーブル名>
ALTER COLUMN <カラム名> ADD MASKED WITH (FUNCTION = 'partial(<公開する最初の文字数>, "<穴埋め文字>", <公開する最後の文字数>)');
#遅いSQLに関する情報収集
##アクティブな接続を確認する
SELECT * FROM sys.dm_pdw_exec_sessions where status <> 'Closed' and session_id <> session_id();
##実行中のクエリを確認する
SELECT *
FROM sys.dm_pdw_exec_requests
WHERE status not in ('Completed','Failed','Cancelled')
AND session_id <> session_id()
ORDER BY submit_time DESC;
##実行時間の長いTOP10のクエリを確認する
SELECT TOP 10 *
FROM sys.dm_pdw_exec_requests
ORDER BY total_elapsed_time DESC;
##クエリの実行計画を確認する
SELECT * FROM sys.dm_pdw_request_steps
WHERE request_id = '<QIDから始まるrequest_id>'
ORDER BY step_index;
##クエリの中の詳細情報を取得する
SELECT * FROM sys.dm_pdw_sql_requests
WHERE request_id = '<QIDから始まるrequest_id>' AND STEP_INDEX=<対象のstep_indexの番号>;
##dmsの状況の確認
SELECT * FROM sys.dm_pdw_dms_workers
WHERE request_id = '<QIDから始まるrequest_id>';
#COPYによるロード
COPY機能でSynapse SQLプールにデータを投入してみた
##COPYによるデータロード(CSV)
COPY INTO <テーブル名> --ロードするテーブル名を指定
FROM 'https://<ストレージアカウント名>.blob.core.windows.net/<コンテナ名>/<フォルダ名>/*' --ロードするファイル指定
WITH (
FILE_TYPE = 'CSV' --ファイルフォーマット指定
,CREDENTIAL = (IDENTITY= 'Storage Account Key', SECRET='<ストレージアカウントキー>') --ストレージアカウントキーで認証
,FIELDTERMINATOR='|' --デリミタ(|)
,ROWTERMINATOR = '0x0A' --改行コード(LF)
);
#順序指定クラスター化列ストアインデックス
順序指定クラスター化列ストアインデックス(Azure Synapse Analytics SQLプール)
##順序指定クラスター化インデックスの情報を確認
SELECT
object_name(c.object_id) table_name
, c.name column_name
, i.column_store_order_ordinal
FROM
sys.index_columns i
JOIN sys.columns c ON i.object_id = c.object_id AND c.column_id = i.column_id
WHERE
column_store_order_ordinal <>0;
##列におけるセグメントの範囲を確認する
SELECT
o.name
, pnp.index_id
, cls.row_count
, pnp.data_compression_desc
, pnp.pdw_node_id
, pnp.distribution_id
, cls.segment_id
, cls.column_id
, cls.min_data_id
, cls.max_data_id,
cls.max_data_id-cls.min_data_id as difference
FROM sys.pdw_nodes_partitions AS pnp
JOIN sys.pdw_nodes_tables AS Ntables ON pnp.object_id = NTables.object_id AND pnp.pdw_node_id = NTables.pdw_node_id
JOIN sys.pdw_table_mappings AS Tmap ON NTables.name = TMap.physical_name AND substring(TMap.physical_name,40, 10) = pnp.distribution_id
JOIN sys.objects AS o ON TMap.object_id = o.object_id
JOIN sys.pdw_nodes_column_store_segments AS cls ON pnp.partition_id = cls.partition_id AND pnp.distribution_id = cls.distribution_id
JOIN sys.columns as cols ON o.object_id = cols.object_id AND cls.column_id = cols.column_id
WHERE
o.name = '<テーブル名>'
and cols.name = '<カラム名>'
and TMap.physical_name not like '%HdTable%'
ORDER BY o.name, pnp.distribution_id, cls.min_data_id;