はじめに
Tableauワークブックの調査において、ワークブックが参照しているテーブルの一覧が必要になったため、VBAマクロで抽出しようと思いました。
コード
Option Explicit
Sub ExtractRedshiftTablesFromTWB()
Dim fso As Object
Dim inputFolder As String
Dim csvFile As String
Dim file As Object
Dim xmlDoc As Object
Dim results As Collection
Dim dict As Object
' ファイルシステムオブジェクトの作成
Set fso = CreateObject("Scripting.FileSystemObject")
Set dict = CreateObject("Scripting.Dictionary")
Set results = New Collection
' フォルダパスの設定
' メインシートのC2セルからinputフォルダパスを取得
Dim mainSheet As Worksheet
On Error Resume Next
Set mainSheet = ThisWorkbook.Worksheets("メイン")
If mainSheet Is Nothing Then
Set mainSheet = ThisWorkbook.Worksheets("Main")
End If
If mainSheet Is Nothing Then
Set mainSheet = ThisWorkbook.Worksheets(1) ' 最初のシートを使用
End If
On Error GoTo 0
inputFolder = mainSheet.Range("C2").Value
' inputフォルダパスが空の場合はデフォルトを使用
If inputFolder = "" Then
inputFolder = ThisWorkbook.Path & "\input"
End If
csvFile = ThisWorkbook.Path & "\redshift_tables.csv"
' inputフォルダの存在確認
If Not fso.FolderExists(inputFolder) Then
MsgBox "inputフォルダが見つかりません: " & inputFolder & vbCrLf & _
"メインシートのC2セルに正しいパスを入力してください。", vbExclamation
Exit Sub
End If
' inputフォルダ内のすべての.twbファイルを処理
For Each file In fso.GetFolder(inputFolder).Files
If LCase(fso.GetExtensionName(file.Path)) = "twb" Then
ProcessTWBFile file.Path, dict
End If
Next file
' 結果をExcelシートに出力
WriteResultsToSheet dict
' ExcelシートからCSVに出力
ExportSheetToCSV csvFile
MsgBox "処理が完了しました。" & vbCrLf & _
"CSVファイル: " & csvFile & vbCrLf & _
"Excelシート: 'Redshift_Tables'", vbInformation
End Sub
Sub ProcessTWBFile(filePath As String, dict As Object)
Dim xmlDoc As Object
Dim datasources As Object
Dim datasource As Object
Dim connections As Object
Dim relations As Object
Dim node As Object
Dim connectionInfo As String
Dim tableInfo As String
Dim schema As String
Dim tableName As String
Dim fileName As String
Dim datasourceName As String
Dim datasourceCaption As String
Dim fso As Object
Set fso = CreateObject("Scripting.FileSystemObject")
fileName = fso.GetFileName(filePath)
' XMLドキュメントの作成と読み込み
Set xmlDoc = CreateObject("MSXML2.DOMDocument.6.0")
xmlDoc.async = False
xmlDoc.Load filePath
If xmlDoc.parseError.ErrorCode <> 0 Then
Debug.Print "XMLパースエラー in " & fileName & ": " & xmlDoc.parseError.reason
Exit Sub
End If
' データソース要素を処理
Set datasources = xmlDoc.SelectNodes("//datasource[@hasconnection='false' or not(@hasconnection)]")
For Each datasource In datasources
' データソース名を取得
On Error Resume Next
datasourceName = ""
datasourceCaption = ""
datasourceName = datasource.getAttribute("name")
datasourceCaption = datasource.getAttribute("caption")
On Error GoTo 0
' captionが取得できない場合はnameを使用
If datasourceCaption = "" Or datasourceCaption = vbNullString Then
datasourceCaption = datasourceName
End If
' Parametersデータソースはスキップ
If datasourceName <> "Parameters" Then
' このデータソース内の接続情報を取得
Set connections = datasource.SelectNodes(".//named-connection[@class='redshift']")
For Each node In connections
connectionInfo = GetConnectionInfo(node)
If Not dict.Exists("CONNECTION_" & fileName & "_" & datasourceName) Then
dict.Add "CONNECTION_" & fileName & "_" & datasourceName, connectionInfo
End If
Next node
' このデータソース内のテーブル情報を取得(type='table')
Set relations = datasource.SelectNodes(".//relation[@type='table']")
For Each node In relations
tableInfo = node.getAttribute("table")
If Not IsNull(tableInfo) And tableInfo <> "" Then
ParseTableInfo tableInfo, schema, tableName
AddToDictWithDatasource dict, fileName, datasourceCaption, schema, tableName, "Table"
End If
Next node
' このデータソース内のカスタムSQLクエリを解析(type='text')
Set relations = datasource.SelectNodes(".//relation[@type='text']")
For Each node In relations
Dim sqlText As String
sqlText = node.Text
If sqlText <> "" Then
ExtractTablesFromSQLWithDatasource sqlText, dict, fileName, datasourceCaption
End If
Next node
End If
Next datasource
End Sub
Function GetConnectionInfo(node As Object) As String
Dim dbname As String
Dim server As String
Dim schema As String
Dim username As String
On Error Resume Next
dbname = node.getAttribute("dbname")
server = node.getAttribute("server")
schema = node.getAttribute("schema")
username = node.getAttribute("username")
On Error GoTo 0
GetConnectionInfo = "Server: " & server & ", Database: " & dbname & _
", Schema: " & schema & ", User: " & username
End Function
Sub ParseTableInfo(tableInfo As String, ByRef schema As String, ByRef tableName As String)
' [schema].[table] 形式をパース
Dim parts() As String
' 角括弧を削除
tableInfo = Replace(tableInfo, "[", "")
tableInfo = Replace(tableInfo, "]", "")
' ピリオドで分割
parts = Split(tableInfo, ".")
If UBound(parts) >= 1 Then
schema = parts(0)
tableName = parts(1)
Else
schema = "unknown"
tableName = tableInfo
End If
End Sub
Sub ExtractTablesFromSQL(sqlText As String, dict As Object, fileName As String)
' 旧バージョンとの互換性のため残す
ExtractTablesFromSQLWithDatasource sqlText, dict, fileName, ""
End Sub
Sub ExtractTablesFromSQLWithDatasource(sqlText As String, dict As Object, fileName As String, datasourceName As String)
Dim regex As Object
Dim matches As Object
Dim match As Object
Dim schema As String
Dim tableName As String
Dim fullTableName As String
Set regex = CreateObject("VBScript.RegExp")
' FROM句とJOIN句のテーブルを検索するパターン
' "schema"."table" または [schema].[table] 形式
regex.Pattern = "(?:FROM|JOIN)\s+""?([^"".\s]+)""?\.""?([^"".\s]+)""?"
regex.Global = True
regex.IgnoreCase = True
Set matches = regex.Execute(sqlText)
For Each match In matches
If match.SubMatches.Count >= 2 Then
schema = match.SubMatches(0)
tableName = match.SubMatches(1)
AddToDictWithDatasource dict, fileName, datasourceName, schema, tableName, "SQL"
End If
Next match
End Sub
Sub AddToDict(dict As Object, fileName As String, schema As String, tableName As String, source As String)
' 旧バージョンとの互換性のため残す
AddToDictWithDatasource dict, fileName, "", schema, tableName, source
End Sub
Sub AddToDictWithDatasource(dict As Object, fileName As String, datasourceName As String, schema As String, tableName As String, source As String)
Dim key As String
' スキーマ名が"Extract"の場合は除外
If LCase(schema) = "extract" Then
Exit Sub
End If
key = schema & "." & tableName & "|" & fileName & "|" & datasourceName & "|" & source
If Not dict.Exists(key) Then
dict.Add key, Array(fileName, datasourceName, schema, tableName, source)
End If
End Sub
Sub ExportSheetToCSV(csvFilePath As String)
Dim ws As Worksheet
Dim fso As Object
Dim csvFile As Object
Dim row As Long
Dim col As Long
Dim lastRow As Long
Dim lastCol As Long
Dim lineText As String
' Redshift_Tablesシートを取得
On Error Resume Next
Set ws = ThisWorkbook.Worksheets("Redshift_Tables")
On Error GoTo 0
If ws Is Nothing Then
MsgBox "Redshift_Tablesシートが見つかりません。", vbExclamation
Exit Sub
End If
' ファイルシステムオブジェクトの作成
Set fso = CreateObject("Scripting.FileSystemObject")
Set csvFile = fso.CreateTextFile(csvFilePath, True, False)
' 最終行と最終列を取得
lastRow = ws.Cells(ws.Rows.Count, 1).End(xlUp).row
lastCol = 6 ' F列まで
' データをCSVに書き込み
For row = 1 To lastRow
lineText = ""
For col = 1 To lastCol
' セルの値を取得(カンマを含む場合は引用符で囲む)
Dim cellValue As String
cellValue = CStr(ws.Cells(row, col).Value)
' カンマ、改行、引用符を含む場合は引用符で囲む
If InStr(cellValue, ",") > 0 Or InStr(cellValue, vbLf) > 0 Or InStr(cellValue, """") > 0 Then
cellValue = """" & Replace(cellValue, """", """""") & """"
End If
If col = 1 Then
lineText = cellValue
Else
lineText = lineText & "," & cellValue
End If
Next col
' 空行でない場合のみ書き込み
If Trim(Replace(lineText, ",", "")) <> "" Then
csvFile.WriteLine lineText
End If
Next row
csvFile.Close
' 成功メッセージ
Debug.Print "CSV exported to: " & csvFilePath
End Sub
Sub WriteResultsToSheet(dict As Object)
Dim ws As Worksheet
Dim key As Variant
Dim item As Variant
Dim row As Long
Dim sheetName As String
sheetName = "Redshift_Tables"
' 既存のシートがある場合は削除
On Error Resume Next
Application.DisplayAlerts = False
ThisWorkbook.Worksheets(sheetName).Delete
Application.DisplayAlerts = True
On Error GoTo 0
' 新しいシートを作成
Set ws = ThisWorkbook.Worksheets.Add
ws.Name = sheetName
' ヘッダーを設定
With ws
.Range("A1").Value = "File Name"
.Range("B1").Value = "Datasource Name"
.Range("C1").Value = "Schema"
.Range("D1").Value = "Table Name"
.Range("E1").Value = "Source Type"
.Range("F1").Value = "Full Reference"
' ヘッダーの書式設定
.Range("A1:F1").Font.Bold = True
.Range("A1:F1").Interior.Color = RGB(200, 200, 200)
End With
row = 2
' データを出力
For Each key In dict.Keys
If Left(key, 11) <> "CONNECTION_" Then
item = dict(key)
ws.Cells(row, 1).Value = item(0) ' File Name
ws.Cells(row, 2).Value = item(1) ' Datasource Name
ws.Cells(row, 3).Value = item(2) ' Schema
ws.Cells(row, 4).Value = item(3) ' Table Name
ws.Cells(row, 5).Value = item(4) ' Source Type
ws.Cells(row, 6).Value = item(2) & "." & item(3) ' Full Reference
row = row + 1
End If
Next key
' 接続情報を別のセクションに出力
row = row + 2
ws.Cells(row, 1).Value = "Connection Information"
ws.Range(ws.Cells(row, 1), ws.Cells(row, 6)).Merge
ws.Cells(row, 1).Font.Bold = True
ws.Cells(row, 1).Interior.Color = RGB(200, 200, 200)
row = row + 1
For Each key In dict.Keys
If Left(key, 11) = "CONNECTION_" Then
ws.Cells(row, 1).Value = Replace(Replace(key, "CONNECTION_", ""), "_", " - ")
ws.Cells(row, 2).Value = dict(key)
ws.Range(ws.Cells(row, 2), ws.Cells(row, 6)).Merge
row = row + 1
End If
Next key
' 列幅を自動調整
ws.Columns("A:F").AutoFit
' テーブルとして書式設定
On Error Resume Next
ws.ListObjects.Add(xlSrcRange, ws.Range("A1").CurrentRegion, , xlYes).Name = "RedshiftTables"
On Error GoTo 0
End Sub
' テスト用のサブルーチン
Sub TestExtraction()
ExtractRedshiftTablesFromTWB
End Sub