포스트

[MLOps] VertexAI 3 (AutoML)

Vertex AI에서 AutoML사용하기

[MLOps] VertexAI 3 (AutoML)

Vertex AI AutoML Image Classification Pipeline 정리

이번 글에서는 Vertex AI + Kubeflow Pipelines를 활용해
이미지 데이터셋 생성 → AutoML 학습 → Endpoint 생성 → 모델 배포까지
한 번에 수행하는 파이프라인 코드를 정리한다.

1
2
3
4
5
6
7
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path="project.env")

PROJECT_ID = os.getenv("PROJECT_ID")
REGION = os.getenv("REGION")
1
2
# Project ID 세팅
! gcloud config set project {PROJECT_ID}
1
Updated property [core/project].
1
BUCKET_URI = f"gs://practice-{PROJECT_ID}"
1
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}
1
2
Creating gs://practice-atomic-marking-482405-a5/...
ServiceException: 409 A Cloud Storage bucket named 'practice-atomic-marking-482405-a5' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.
1
2
shell_output = !gcloud auth list 2>/dev/null
SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()
1
2
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI
1
2
No changes made to gs://practice-atomic-marking-482405-a5/
No changes made to gs://practice-atomic-marking-482405-a5/

전체 파이프라인 개요

이 파이프라인은 다음 과정을 자동으로 수행한다.

  1. Image Dataset 생성
  2. AutoML Image Classification 학습
  3. Endpoint 생성
  4. 모델 배포 (Online Serving)

참고: GCP 파이프라인 구축

1
2
3
4
5
6
7
8
from typing import Any, Dict, List

import google.cloud.aiplatform as aip
import kfp
from kfp.v2 import compiler

import random
import string
1
PIPELINE_ROOT = f"{BUCKET_URI}/pipeline_root/automl_image_classification"
1
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
@kfp.dsl.pipeline(name="automl-flower-clf")
def pipeline(project: str = PROJECT_ID, region: str = REGION):
    # AutoML Image 학습 Job 컴포넌트
    from google_cloud_pipeline_components.v1.automl.training_job import (
        AutoMLImageTrainingJobRunOp
    )

    # 이미지 데이터셋 생성 컴포넌트
    from google_cloud_pipeline_components.v1.dataset import (
        ImageDatasetCreateOp
    )

    # Endpoint 생성 및 모델 배포 컴포넌트
    from google_cloud_pipeline_components.v1.endpoint import (
        EndpointCreateOp,
        ModelDeployOp
    )

    # 1️⃣ 이미지 분류용 Dataset 생성
    ds_op = ImageDatasetCreateOp(
        project=project,
        location=region,
        display_name="flowers_dataset",
        # 이미지 경로와 라벨이 포함된 CSV
        gcs_source="gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv",
        # 단일 라벨 이미지 분류 스키마
        import_schema_uri=aip.schema.dataset.ioformat.image.single_label_classification,
    )

    # 2️⃣ AutoML Image Classification 학습 실행
    training_job_run_op = AutoMLImageTrainingJobRunOp(
        project=project,
        location=region,
        display_name="automl-flower-clf",
        prediction_type="classification",  # 분류 문제
        model_type="CLOUD",                 # Cloud AutoML 모델
        dataset=ds_op.outputs["dataset"],   # 위에서 생성한 Dataset 사용
        model_display_name="automl-flower-clf",
        training_fraction_split=0.6,        # 학습 데이터 60%
        validation_fraction_split=0.2,     # 검증 데이터 20%
        test_fraction_split=0.2,           # 테스트 데이터 20%
        budget_milli_node_hours=9000,       # 학습 예산
    )

    # 3️⃣ 모델 서빙을 위한 Endpoint 생성
    endpoint_op = EndpointCreateOp(
        project=project,
        location=region,
        display_name="automl-flower-clf",
    )

    # 4️⃣ 학습된 모델을 Endpoint에 배포
    ModelDeployOp(
        model=training_job_run_op.outputs["model"],
        endpoint=endpoint_op.outputs["endpoint"],
        automatic_resources_min_replica_count=1,
        automatic_resources_max_replica_count=1,
    )
1
2
3
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="automl_image_classification_pipeline.yaml"
)
1
2
3
4
5
6
7
8
9
10
11
UUID = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
DISPLAY_NAME = "flowers_automl_" + UUID

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="automl_image_classification_pipeline.yaml",
    pipeline_root=PIPELINE_ROOT,
    enable_caching=False,
)

job.run()

pipeline

SDK를 활용해서 Pipeline 정보 확인 및 컨트롤

1
2
3
4
5
6
def get_task_detail(
    task_details: List[Dict[str, Any]], task_name: str
) -> List[Dict[str, Any]]:
    for task_detail in task_details:
        if task_detail.task_name == task_name:
            return task_detail

pipeline detail 확인

1
2
3
4
pipeline_task_details = (
    job.gca_resource.job_detail.task_details
)
pipeline_task_details

endpoint 확인

1
2
3
4
5
6
endpoint_task = get_task_detail(pipeline_task_details, "endpoint-create")
endpoint_resourceName = (
    endpoint_task.outputs["endpoint"].artifacts[0].metadata["resourceName"]
)
endpoint = aip.Endpoint(endpoint_resourceName)
endpoint
1
2
<google.cloud.aiplatform.models.Endpoint object at 0x7fd7fe876bc0> 
resource name: projects/374561168874/locations/us-central1/endpoints/7030909567185715200

endpoint undeploy와 삭제

1
2
endpoint.undeploy_all()
endpoint.delete()

pipeline의 모델과 모델 제거하기

1
2
3
4
model_task = get_task_detail(pipeline_task_details, "model-upload")
model_resourceName = model_task.outputs["model"].artifacts[0].metadata["resourceName"]
model = aip.Model(model_resourceName)
model.delete()

Pipline 제거하기

1
job.delete()
이 기사는 저작권자의 CC BY 4.0 라이센스를 따릅니다.