こちらを実行する準備ができたので、ウォークスルーしていきます。
ステップ1: 環境のセットアップ
%pip install -U -q twelvelabs databricks-vectorsearch
dbutils.library.restartPython()
from twelvelabs import TwelveLabs
import os
# Databricksシークレット(推奨)からAPIキーを取得
# はじめにシークレットのスコープをセットアップしてからAPIキーを追加する必要があります
TWELVE_LABS_API_KEY = dbutils.secrets.get(scope="your-scope", key="twelvelabs-api-key")
if TWELVE_LABS_API_KEY is None:
raise ValueError("TWELVE_LABS_API_KEY environment variable is not set")
# Initialize the Twelve Labs client
twelvelabs_client = TwelveLabs(api_key=TWELVE_LABS_API_KEY)
ステップ2: マルチモーダルエンべディングの生成
UDFの定義
from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import ArrayType, FloatType
from twelvelabs.models.embed import EmbeddingsTask
import pandas as pd
@pandas_udf(ArrayType(FloatType()))
def get_video_embeddings(urls: pd.Series) -> pd.Series:
def generate_embedding(video_url):
twelvelabs_client = TwelveLabs(api_key=TWELVE_LABS_API_KEY)
task = twelvelabs_client.embed.task.create(
engine_name="Marengo-retrieval-2.6",
video_url=video_url
)
task.wait_for_done()
task_result = twelvelabs_client.embed.task.retrieve(task.id)
embeddings = []
for v in task_result.video_embeddings:
embeddings.append({
'embedding': v.embedding.float,
'start_offset_sec': v.start_offset_sec,
'end_offset_sec': v.end_offset_sec,
'embedding_scope': v.embedding_scope
})
return embeddings
def process_url(url):
embeddings = generate_embedding(url)
return embeddings[0]['embedding'] if embeddings else None
return urls.apply(process_url)
動画URLを持つサンプルデータフレームの作成
動作確認します。
video_urls = [
"https://sample-videos.com/video321/mp4/720/big_buck_bunny_720p_2mb.mp4",
]
df = spark.createDataFrame([(url,) for url in video_urls], ["video_url"])
ちなみに、動画の内容を確認するためにノートブックに動画を埋め込むことも可能です。
from IPython.display import Video
Video("https://sample-videos.com/video321/mp4/720/big_buck_bunny_720p_2mb.mp4")
エンべディングを生成するためにUDFを適用して結果を表示
df_with_embeddings = df.withColumn("embedding", get_video_embeddings(df.video_url))
df_with_embeddings.show(truncate=False)
+----------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|video_url |embedding
*** WARNING: max output size exceeded, skipping output. ***
.029979462, -0.0123899365, -0.0055744634, -0.037173226, 0.032232665, -0.04850397, -0.0016167792, 0.047376946, -0.039089154, 0.04274626, 0.01267932, -0.048392903, 0.045584273, -0.004410955, -0.029655935, -9.784038E-4, -0.0016463452, -0.07463025, -0.013062307, -0.024386011, 0.013101474, 0.001993212, -0.027518196, 0.01511438, -0.06364772, 1.1663671E-4, 0.034992184, 0.0672028, 0.018348949, 0.01770745, 0.037930764, 0.010303554, 0.0017949131, -0.016391346, -0.044297967, 0.010202993, -0.02199695, 0.11376265, -0.010827421, 0.004213572, -0.052402616, 0.005501019, -0.05605638, -0.03627798, -0.013551237, 0.036587983, 0.02379138, -5.7136023E-4, 0.002467874, -0.078592725, 0.078369886, -0.0055490523, -0.01577636, 0.06439924, -0.037139926, -0.07354969, -0.07867986, 0.026089974, -0.0144738825, 0.011421639, -0.007971645, -0.007326717, 0.13272676, -0.040904842, -0.005276445, -0.01808855, 2.8698216E-4, 0.013798602, -0.018425401, 0.018862937, 0.026388913, 0.015586878, -0.015969008, -0.0147435, 0.0022153151, 0.03855804, 0.034329474, -0.03983455, -0.0026194516, 0.0020268906, -0.010093657, -0.0134882545, -0.013830042, -0.022625577, -0.041252874, -0.018028954, 0.06833717, -0.004793218, -0.049187027, -0.06620049, 0.009646389, 0.025857983, 0.03442328, 0.026219007, 0.0026174388, 0.009492891, 0.06875413, 0.04376699, 0.025001785, 0.022415116, 0.028489543, 0.02402294, 0.024360623, 0.031146033, -0.0027573842, -0.040314585, -0.022700105, -0.020321239, -0.028665775, -0.020925378, 0.009680659, -0.026738932, 0.003007026, 0.012681515, 0.035421263, 0.011382212, 0.005099818, -0.063615896, 0.012620723, -0.023630342, 0.028959367, -0.01437283, 0.02259669, -0.056993496, -0.009739823, -0.011462339, -0.009561893, 0.009574517, -0.055388626, 0.07643259, -0.02500566, 0.020038906, 0.031926263, -0.012772667, -0.002938917, -0.0038700106, 0.008011213, -0.006646045, -0.007908108, -0.004766426, -0.00351735, 0.042283133, 0.049898785, -0.065704174, -0.0037304861, -0.01151114, -0.0024640292, 0.004310888, -0.035348907, -9.171123E-4, 0.018550886, 0.010107016, -0.02289733, -0.022663245, -0.05894512, -0.025407143, -0.05880697, -0.031622574, -0.0012540758, 0.0037431223, -0.011520247, -0.046826173, 0.0024289924, -0.00740329, -0.08953589, -0.047887046, -0.028516041, 0.026353989, 0.0020365664, 0.02595641, 0.05749229, -0.012663947, -0.025053045, -0.012981865, 0.046354897, 0.006227492, -0.033957247, -0.010253096, 0.07652309, 0.0058225146, 0.060991738, 0.04871023, -0.0332532, 0.04815378, -0.01570799, 0.03566884, -0.045501478, -0.031891678, -0.019189473, -0.03240488, -0.0077978945, 4.9894326E-4, -0.040615343, 0.01757751, -0.00895925, -0.02682785, 0.040195677, 0.027776215, 0.017129159, 0.019830845, -0.009518289, 0.064962566, 0.019490328, 0.036231436, 0.020634476, 0.005467864, 0.057580195, -0.013807259, 0.012161428, -0.006541044, -0.0041659814, -0.0039754347, -0.060644444, 0.017126339, -0.01974226, 0.025122225, 0.10432627, -0.005949118, -0.0048080296, -0.07089896, -0.008770434, -0.013600175, 0.04531312, -0.049193505, -0.016741449, -0.031173008, -0.0025019867, -0.0024958854, -0.02895726, -0.015425376, 0.020596335, 0.002824636, 0.011593557, -0.013473093, 0.04224118, 0.021186974, 0.033365857, 0.002958535, 0.025283583, -0.044646528, -0.0119344555, -0.035654284, 0.023465458, -0.019836476, 0.023688423, -0.0083645545, -0.014283521, -0.0050694784, -0.005806634, -0.08953373, 0.006386916, -0.007508261, 0.0035115809, 0.006901959, 0.011953678, -0.04919674, 0.012631615, -0.0017981332, -0.018585268, 0.00508909, -0.0022484532, -0.0018763348, 0.0037454825, -0.023995152, 0.03636696, 0.023159042, 0.008948886, -0.021473302, 0.00997134, 0.033559818, 0.001980704, 0.024696987, -0.01335462, -0.011318345, -0.0011114654, -0.11100561, -0.07194375, -0.015610289, 0.027786247, 0.03746622, 7.853715E-4, 0.028748885, 0.010888568, -0.03293643, 0.023106314, -0.0075672725, -0.036387347, -0.04217908, -0.03333749, -0.027250111, -0.0117708985, -0.039556615, 0.037336506, 0.021915795, -0.030081682, -0.03598429, -0.020563956, -0.012323749, -0.036752712, -0.021762777, 0.010191495, 0.008998425, -0.013376029, 0.0050017727, 0.010853094, -0.04001397, 0.018472767, -0.020190556, 0.030709371, 0.06174421, 0.05149658, -0.0029530926, -0.0053036846, -0.016851818, 0.0032166045, -0.05727647, 0.010107425, 0.035392154, 0.0034632212, 0.042749193, -0.0017062774, 0.009733895, 0.027704388, 0.01631579, -0.005881855, -0.024372414, 0.029140119, -0.015830375, 0.018877367, -0.010800196, 0.018390927, 0.04814988, -0.004078993, 0.017940816, 0.041012403, 0.006339234, -0.015543695, -0.006418781, 0.005841562, 0.009694471, -0.032982863, -0.016252723, 1.6219822E-4, -0.026791336, 0.027037332, -0.015306181, 0.023582183, -0.021819513, 0.026548717, 0.03711314, -0.011760128, -1.271148E-4, 0.0150151355, -0.03303953, 0.04133108, -0.027027242, 0.04966926, -0.002690468, -0.030231435, -0.003955537, -0.011522204, 0.014775016, 0.009684368, 0.0071682744, 0.008235005, 0.0016355093, 0.05628964, 0.05475489, 0.015920088, -0.0071062213, -0.0015886498, 0.02544889, -0.067594215, 0.025111146, 0.011640886, -0.01910595, 0.024285156, -0.01585235, 0.040026307, -0.07696258, -0.018167432, 3.9994437E-5, 0.04627793, 0.014189968, 0.04098593, 0.051961217, -0.032129128, 0.008191405, 0.011671172, -0.018879263, -0.019578215, 0.014174741, -0.038835026, 0.006707622, -0.011135283, 0.01221356, 0.029053112, -0.021440722, -0.044847988, -0.022430168, 0.029739657, 0.00195628, 0.008681817, 0.021279486, -0.0015969306, 0.009241477, 0.03292187, -0.0426318, 0.019670224, -0.018986152, -0.048227575, 0.017335715, 0.014540468, 0.04839645, -0.037581444, -0.03019634, -0.03865438, 0.01231668, -0.011397396, -0.08061027, -0.07435432, -0.018448962, -0.03673627, -0.022009648, -0.011198546, -0.0040190853, 0.035860296, -0.046089903, -0.033202916, 0.020630464, 0.016672404, -0.032497652, -0.0037439202, 0.0042082765, 0.0016541722, 0.012422817, 0.007048867, 0.0155687975, -0.031261258, -0.014674201, -0.04801593, -0.014110611, 0.019653302, 0.012805023, 0.071879804, 0.013774836, 0.033548422, -0.016708776, -0.013556407, -0.026897974, 0.01436472, 0.009483182, -0.005683514, 0.029322535, -0.050043557, -0.04793329, 0.06087792, -0.062790245, -0.013978833, 0.0099653695, 0.0046472526, 0.0094364835, 0.020706052, 0.047697205, -0.032865364, 0.005339946, -0.020726513, -0.021009311, 0.011081528, 0.05469637, 0.012156873, 0.045223463, 0.0014587771, -0.011124406, -0.02268358, 0.019706791, 0.025472473, -2.9468052E-5, -0.013040956, -0.032629672, 0.031776782, 0.0017764133, -0.019824488, 0.03419612, 0.037325967, 0.032718237, 0.035823535, -0.027880378, -0.005077116, -0.018756319, -0.03115097, -0.027171759, 0.066228785, -0.013171057, 0.026810985, -0.0337985, 0.013006288, 0.0059979064, -0.012463553, 0.0142834, 0.010552169, 8.486406E-4, -3.104765E-5, 0.01731959, -0.04765351, -0.008414988, -0.019625656, -0.031051427, 0.08809011, 0.03498049, 0.028374258, 0.014493979, -0.048333466, -0.0127950655, -0.009906523, -0.04418479, 0.02338317, 0.013540955, -0.025749601, -0.001509237, 0.004667368, -0.010027624, -0.09683784, -0.0074271015, 0.031031735, 0.06552573, 0.0011700807, 0.027665097, 0.0025431935, -0.010171209, -0.03302758, -0.049705345, -5.498426E-4, -0.019176159, 0.010525753, 0.019365923, -0.025372038, -0.0262662, -0.06266142, 0.015043414, 0.019883644, -0.05477573, -0.013528351, 0.060502782, 0.0016651365, -0.02587618, -0.012565702, -0.0085338, -0.034005895, -0.030409258, 0.0129748965, 0.0174038, -0.0072504054, 0.0035223616, 0.010577307, 0.044128217, 0.0141861085, 0.026956914, -0.023149395, -0.046100385, -0.004252346, -0.019334903, 0.011459365, -0.022140006, -0.018958053, 0.00767007, 9.469797E-5, -0.034521237, -0.0029992084, 0.017002644, -0.03857146, 0.018912727, -0.09876343, 0.019118682, 0.013727099, -0.025606116, 0.006697321, 0.016532153, -0.06600049, -0.022878097, -0.012504256, -0.010020266, 0.0018482135, 0.035317346, 0.023673993, 0.040592246, -0.016732255, 0.0454684, -0.017790614, -0.0085586775, 0.026683826, 0.013783821, 0.053053964, -0.008992355, 0.030399311, -0.020832758, -0.022222932, 0.034638535, 0.011332447, 0.017604848, -0.028097544, -0.06297504, 0.014055168, 0.0067813946, 0.011903417, 0.0060141636, 0.015707646, 0.0342147, 0.04133136, -0.026782503, -0.0021297182, 0.021704596, 0.0067510046, 0.049061943, -0.017108439, -0.018697355, 0.046567168, 0.033586267, -0.03464465, 0.011004962, 0.030477911, 0.04260684, 0.02525125, 0.003433386, -0.0068764673, 0.061720237, -0.008662938, 2.0904119E-4, -0.022943163, -0.005221573, -0.0053868224, -0.087346174, 0.026180565, -0.066882506, -0.043699328, -0.03379622, -0.01599642, -0.015535398, 0.03331518, 0.012356143, 0.0036131523, 0.0097709345, 0.0034276696, 0.04864022, 0.036749244, -0.036260612, -0.004460548, 6.129405E-4, 0.03265899, -0.0072730454, -0.08008531, -0.005295228, 0.015160056, 0.019982748, 0.03755122, -0.008657462, -0.061977796, 0.048145417, 0.029669765, -9.3363423E-4, 0.021445815, 0.004024409, -0.016229128, -0.004541855, 0.023112899, 0.039444055, -0.0069418466, 0.036537692, -5.684492E-4, 0.01649976, -0.007558249, -0.0016717192, 0.026119782, -0.015853805, -0.007098712, 0.0032465474, -0.0572105, -0.02998347, -0.007961127, 0.020138677, 0.02082604, 0.036894556, 0.016602177, 0.00528833, 0.038105793, 0.0059311558, 0.004846056, -0.006694053, -0.029141342, -0.029799981, -0.04140565, -0.016081609, 0.00789425, -0.027987724, 0.0018632964, 0.0016470893, -0.003178147, 0.0029932358, -0.044273306, 0.009340824, -0.089985475, 0.04410994, -0.0061103003, 0.015524098, -0.021418516, 0.003967481, 0.0336457, 0.047461215, -0.014298582, -0.0031822163, -0.0062466552, -0.006869331, -0.02357861, -0.012142815, -0.011450043, -0.027151404, -0.0066985073, 0.022578882, 0.023353761, 0.016418545, 0.0024721217, -0.011998047, -0.0015260611, -0.04206061, 0.00632177, -0.024579424, -0.011868283, -0.03499283, -0.006257664, 0.0391778, 0.016695738, 0.014413746, -0.030029336, 0.0053191343, 0.022528451, -0.031708412, 0.025951825, -0.03107221, 0.03250863, 0.0067419275, 0.004386547, -0.014997875, -0.0143616125, 0.061790902, 0.024747757, 0.009407722, -0.012400008, -0.019415641, -0.02699906, -0.018273393, -0.0034985887, -0.0027711177, -0.016569616, -0.04723437, -0.04612775, 0.013592889, 0.12101954, 0.0021926097, 0.022482758, 0.082998626, -0.00877125, 0.001825291, 0.052395027, -0.0204819, -0.033035766, 0.042189624, -0.008924477, -0.021261735, -0.0033142276, 0.00649192, -0.008119222, -0.023724996, 0.035766765, 0.00929841, 0.0029688273, -0.022010954, 0.054030456, 0.026407681, 0.040694404, 0.03257478, -0.007874122, 0.015783455, -0.09610485, -0.02426855, -0.076495945, -0.016188657, 0.026636012, 0.01572097, 0.028501688, -0.020508016, 0.023900539, 0.006957213, 0.028514054, -0.00770535, -0.026031202, 0.019580295, 0.0080894185, -0.058947135, -0.038718473, -0.031121874, -0.07335459, 0.019822553, 0.013191584, 0.015294253, -0.023202296, -0.029736523, -0.0038419992, 0.018930998, 0.021007804, -0.04660817, -0.009053059, 0.03631448, -0.017124599, 0.004365605, -0.051075947, -0.012290755, 0.07362776, 0.047026146, 0.003133233, -0.0045102932, 0.046650674, 0.024756335, -0.006512382, -0.075702764, -0.0015884327, 0.013005683, -0.0050416673, 0.012466058, -3.1125703E-4, 0.011988929, -0.012687706, 0.004293034, 0.052573267]|
+----------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
ステップ3: 動画エンべディングのためのDeltaテーブルの作成
from pyspark.sql import Row
# サンプル動画URLとメタデータのリストを作成
video_data = [
Row(url='http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ElephantsDream.mp4', title='Elephant Dream'),
Row(url='http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/Sintel.mp4', title='Sintel'),
Row(url='http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4', title='Big Buck Bunny')
]
# リストからDataFrameを作成
source_df = spark.createDataFrame(video_data)
source_df.show()
+--------------------+--------------+
| url| title|
+--------------------+--------------+
|http://commondata...|Elephant Dream|
|http://commondata...| Sintel|
|http://commondata...|Big Buck Bunny|
+--------------------+--------------+
%sql
CREATE TABLE IF NOT EXISTS users.takaaki_yayoi.videos_source_embeddings (
id BIGINT GENERATED BY DEFAULT AS IDENTITY,
url STRING,
title STRING,
embedding ARRAY<FLOAT>
) TBLPROPERTIES (delta.enableChangeDataFeed = true);
embeddings_df = source_df.withColumn("embedding", get_video_embeddings("url"))
embeddings_df.write.mode("append").saveAsTable("users.takaaki_yayoi.videos_source_embeddings")
display(embeddings_df)
ステップ4: Mosaic AI Vector Searchの設定
ここでは、既存のベクトルサーチエンドポイントを使っているので、エンドポイントの作成は行っていません。
from databricks.vector_search.client import VectorSearchClient
# Vector Search クライアントを初期化し、エンドポイント名を指定
mosaic_client = VectorSearchClient()
endpoint_name = "dbdemos_vs_endpoint"
# ソーステーブル名とインデックス名を定義
source_table_name = "users.takaaki_yayoi.videos_source_embeddings"
index_name = "users.takaaki_yayoi.video_embeddings_index"
index = mosaic_client.create_delta_sync_index(
endpoint_name=endpoint_name,
source_table_name=source_table_name,
index_name=index_name,
primary_key="id",
embedding_dimension=1024,
embedding_vector_column="embedding",
pipeline_type="TRIGGERED"
)
print(f"Created index: {index.name}")
Created index: users.takaaki_yayoi.video_embeddings_index
元のソースでは同期を手動でトリガーしていますが、ベクトルインデックスを作成すると初回の同期がトリガーされるのでスキップしています。
# インデックスの状態を確認します。これには時間がかかる場合があります
index_status = mosaic_client.get_index(
endpoint_name=endpoint_name,
index_name=index_name
)
print(f"Index status: {index_status}")
しばらくすると、ベクトルインデックスを利用できるようになります。
ステップ5: 類似検索の実装
テキストのクエリーのエンべディングを取得する関数です。
def get_text_embedding(text_query):
# Twelve Labs Embed APIを使用してテキストからエンべディングへの変換をサポート
text_embedding = twelvelabs_client.embed.create(
engine_name="Marengo-retrieval-2.6",
text=text_query,
text_truncate="start"
)
return text_embedding.text_embedding.float
類似検索を行う関数です。
def similarity_search(query_text, num_results=5):
# Vector Search クライアントを初期化し、クエリのエンベディングを取得
mosaic_client = VectorSearchClient()
query_embedding = get_text_embedding(query_text)
print(f"Query embedding generated: {len(query_embedding)} dimensions")
# 類似性検索を実行
results = index.similarity_search(
query_vector=query_embedding,
num_results=num_results,
columns=["id", "url", "title"]
)
return results
検索結果をパース、表示するヘルパー関数です。
def parse_search_results(raw_results):
try:
data_array = raw_results['result']['data_array']
columns = [col['name'] for col in raw_results['manifest']['columns']]
return [dict(zip(columns, row)) for row in data_array]
except KeyError:
print("Unexpected result format:", raw_results)
return []
動作確認します。
# 使用例
query = "A dragon"
raw_results = similarity_search(query)
# 検索結果のパースと表示
search_results = parse_search_results(raw_results)
if search_results:
print(f"Top {len(search_results)} videos similar to the query: '{query}'")
for i, result in enumerate(search_results, 1):
print(f"{i}. Title: {result.get('title', 'N/A')}, URL: {result.get('url', 'N/A')}, Similarity Score: {result.get('score', 'N/A')}")
else:
print("No valid search results returned.")
動きました。
[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True to VectorSearchClient().
Query embedding generated: 1024 dimensions
Top 3 videos similar to the query: 'A dragon'
1. Title: Elephant Dream, URL: http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ElephantsDream.mp4, Similarity Score: 0.38943937
2. Title: Sintel, URL: http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/Sintel.mp4, Similarity Score: 0.381103
3. Title: Big Buck Bunny, URL: http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4, Similarity Score: 0.3759887
ステップ6: 動画推薦システムの構築
動画のレコメンデーションを行う関数です。
def get_video_recommendations(video_id, num_recommendations=5):
# ベクトル検索クライアントを初期化
mosaic_client = VectorSearchClient()
# まず、指定されたvideo_idのエンべディングを取得
source_df = spark.table(source_table_name)
video_embedding = source_df.filter(f"id = {video_id}").select("embedding").first()
if not video_embedding:
print(f"No video found with id: {video_id}")
return []
# ビデオのエンべディングを使用して類似性検索を実行
try:
results = index.similarity_search(
query_vector=video_embedding["embedding"],
num_results=num_recommendations + 1, # 入力ビデオを考慮して +1
columns=["id", "url", "title"]
)
# 結果を解析
recommendations = parse_search_results(results)
# 推薦から入力ビデオを除外(存在する場合)
recommendations = [r for r in recommendations if r.get('id') != video_id]
return recommendations[:num_recommendations]
except Exception as e:
print(f"Error during recommendation: {e}")
return []
# 推薦を表示するためのヘルパー関数
def display_recommendations(recommendations):
if recommendations:
print(f"Top {len(recommendations)} recommended videos:")
for i, video in enumerate(recommendations, 1):
print(f"{i}. Title: {video.get('title', 'N/A')}")
print(f" URL: {video.get('url', 'N/A')}")
print(f" Similarity Score: {video.get('score', 'N/A')}")
print()
else:
print("No recommendations found.")
# 使用例
video_id = 2 # これがデータセット内の有効なビデオIDであると仮定
recommendations = get_video_recommendations(video_id)
display_recommendations(recommendations)
[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True to VectorSearchClient().
Top 2 recommended videos:
1. Title: Sintel
URL: http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/Sintel.mp4
Similarity Score: 0.49169502
2. Title: Big Buck Bunny
URL: http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4
Similarity Score: 0.40925112
次のステップはこちらに記載されていますが、個人的にはURLではなくボリュームに格納されている動画に対して動作するようにしたいところです。