LoginSignup
0
0

Databricks にて Azure SQL Dataabse のデータ型と対応する Spark Dataframe のデータ型の調査方法

Posted at

概要

Databricks にて Azure SQL Dataabse のデータ型と対応する Spark Dataframe のデータ型の調査方法を共有します。Azure SQL Database にて一時テーブルを作成し、そのテーブルから Spark Dataframe を作成する方法を実施することで容易に確認できます。下記が Spark Dataframe のデータ型です。float(24) と float(25) で Spark のデータ型が変わるなどの仕様があり、すべてのデータ型の調査しているわけではないです。今後の調査を継続的に実施するために実施手順を共有します。

root
 |-- bit_col: boolean (nullable = true)
 |-- tinyint_col: integer (nullable = true)
 |-- smallint_col: short (nullable = true)
 |-- int_col: integer (nullable = true)
 |-- bigint_col: long (nullable = true)
 |-- numeric_col: decimal(18,0) (nullable = true)
 |-- smallmoney_col: decimal(10,4) (nullable = true)
 |-- money_col: decimal(19,4) (nullable = true)
 |-- decimal_col: decimal(18,6) (nullable = true)
 |-- float_24_col: float (nullable = true)
 |-- float_25_col: double (nullable = true)
 |-- float_53_col: double (nullable = true)
 |-- real_col: float (nullable = true)
 |-- char_col: string (nullable = true)
 |-- varchar_col: string (nullable = true)
 |-- text_col: string (nullable = true)
 |-- nchar_col: string (nullable = true)
 |-- nvarchar_col: string (nullable = true)
 |-- ntext_col: string (nullable = true)
 |-- date_col: date (nullable = true)
 |-- datetimeoffset_col: string (nullable = true)
 |-- datetime_col: timestamp (nullable = true)
 |-- datetime2_col: timestamp (nullable = true)
 |-- time_col: timestamp (nullable = true)
 |-- binary_col: binary (nullable = true)
 |-- varbinary_col: binary (nullable = true)
 |-- image_col: binary (nullable = true)
 |-- varchar_max_col: string (nullable = true)
 |-- nvarchar_max_col: string (nullable = true)
 |-- varbinary_max_col: binary (nullable = true)
 |-- hierarchyid_col: binary (nullable = true)
 |-- xml_col: string (nullable = true)
 |-- uniqueidentifier_col: string (nullable = true)

検証方法

Azure SQL Database の接続情報を定義

server = "ads-01"  ## サーバー名
database = "ads-01"  ## データベース名
username = dbutils.secrets.get(scope="qiita", key="sql_authentificate_user")  ## Azure SQL DB のユーザー名
password = dbutils.secrets.get(scope="qiita", key="sql_authentificate_password")  ## Azure SQL DB のパスワード

image.png

Azure SQL Database にて一時テーブルを作成後に Spark Dataframe を作成

# CREATE TABLE の実行
sql = """
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'#create_tble_01') AND type in (N'U'))
	DROP TABLE #create_tble_01
;
CREATE TABLE #create_tble_01
(    
    -- 正確な数値型 (Exact numerics)
    bit_col BIT,
    tinyint_col TINYINT,
    smallint_col SMALLINT,
    int_col INT,
    bigint_col BIGINT,
    numeric_col NUMERIC(18, 0),
    smallmoney_col SMALLMONEY,
    money_col MONEY,
    decimal_col DECIMAL(18, 6),

    -- 近似数値型 (Approximate numerics):
    float_24_col FLOAT(24),
    float_25_col FLOAT(25),
    float_53_col FLOAT(53),
    real_col real,

    -- 文字列型 (Character strings):
    char_col CHAR(10),
    varchar_col VARCHAR(100),
    text_col text,

    -- Unicode 文字列型 (Unicode character strings)
    nchar_col NCHAR(10),
    nvarchar_col NVARCHAR(100),
    ntext_col ntext,

    -- 日付と時刻型 (Date and time):
    date_col DATE,
    datetimeoffset_col DATETIMEOFFSET,
    datetime_col DATETIME,
    datetime2_col DATETIME2,
    time_col TIME,

    -- バイナリ文字列型 (Binary strings):
    binary_col BINARY(50),
    varbinary_col VARBINARY(50),
    image_col IMAGE,

    -- 大きな値のデータ型 (Large value data types)
    varchar_max_col VARCHAR(MAX),
    nvarchar_max_col NVARCHAR(MAX),

    -- 大きなオブジェクトのデータ型 (Large object data types)
    varbinary_max_col VARBINARY(MAX),

    -- その他のデータ型 (Other data types):
    hierarchyid_col HIERARCHYID,
    xml_col XML,
    uniqueidentifier_col UNIQUEIDENTIFIER NOT NULL PRIMARY KEY DEFAULT NEWID()
);
"""

df = (
    spark.read.format("sqlserver")
    .option("host", f"{server}.database.windows.net")
    .option("user", username)
    .option("password", password)
    .option("database", database)
    .option("prepareQuery", sql)
    .option("dbTable", "#create_tble_01")
    .load()
)

df.printSchema()
root
 |-- bit_col: boolean (nullable = true)
 |-- tinyint_col: integer (nullable = true)
 |-- smallint_col: short (nullable = true)
 |-- int_col: integer (nullable = true)
 |-- bigint_col: long (nullable = true)
 |-- numeric_col: decimal(18,0) (nullable = true)
 |-- smallmoney_col: decimal(10,4) (nullable = true)
 |-- money_col: decimal(19,4) (nullable = true)
 |-- decimal_col: decimal(18,6) (nullable = true)
 |-- float_24_col: float (nullable = true)
 |-- float_25_col: double (nullable = true)
 |-- float_53_col: double (nullable = true)
 |-- real_col: float (nullable = true)
 |-- char_col: string (nullable = true)
 |-- varchar_col: string (nullable = true)
 |-- text_col: string (nullable = true)
 |-- nchar_col: string (nullable = true)
 |-- nvarchar_col: string (nullable = true)
 |-- ntext_col: string (nullable = true)
 |-- date_col: date (nullable = true)
 |-- datetimeoffset_col: string (nullable = true)
 |-- datetime_col: timestamp (nullable = true)
 |-- datetime2_col: timestamp (nullable = true)
 |-- time_col: timestamp (nullable = true)
 |-- binary_col: binary (nullable = true)
 |-- varbinary_col: binary (nullable = true)
 |-- image_col: binary (nullable = true)
 |-- varchar_max_col: string (nullable = true)
 |-- nvarchar_max_col: string (nullable = true)
 |-- varbinary_max_col: binary (nullable = true)
 |-- hierarchyid_col: binary (nullable = true)
 |-- xml_col: string (nullable = true)
 |-- uniqueidentifier_col: string (nullable = true)

image.png

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0