racerX
racerX

Reputation: 1092

Error in Cloud Scheduler Job Created From Java SDK

I have a custom training job that I run on a fixed schedule using Cloud Scheduler. When I create the job using either the Python client or gcp, the job runs fine. However, when I create the cloud scheduler job using the Java SDK, the job gets created but it fails. The SUMMARY of the error message I get in Cloud Logging is:

{"@type":"type.googleapis.com/google.cloud.scheduler.logging.AttemptFinished", "jobName":"projects/{my_project_id}/locations/us-central1/jobs/java_job", "status":"INVALID_ARGUMENT", "targetType":"HTTP", "url":"https://us-central1-aiplatform.googleapis.com/v1/projects/{my_project_id}/locations/us-central1/customJobs"}

I looked at the jobs created in gcp, all fields for the three jobs (the one created using python client, the one created using java SDK and the one created directly in gcp) are the same. I cannot figure out why the job created using the Java SDK keeps failing.

Java SDK code:

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import com.google.cloud.scheduler.v1.Job;
import com.google.cloud.scheduler.v1.LocationName;
import com.google.cloud.scheduler.v1.OAuthToken;
import com.google.protobuf.ByteString;
import com.google.cloud.scheduler.v1.CloudSchedulerClient;
import com.google.cloud.scheduler.v1.HttpMethod;
import com.google.cloud.scheduler.v1.HttpTarget;


public class Temp 
{
    
    static String projectId = "...";
    static String location = "...";
    static String serviceAccountEmail = "[email protected]";
    static String outputUriPrefix = "gs://.../.../";
    static String imageUri = String.format("%s-docker.pkg.dev/%s/.../...", location, projectId);
    
    static String trainingJobName = "custom_training_job";
    static String schedulerJobName = String.format("projects/%s/locations/%s/jobs/java_job", projectId, location);
    static String scope = "https://www.googleapis.com/auth/cloud-platform";
    static String httpTargetUri = String.format("https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/customJobs", 
            location, projectId, location);
    static String machineType = "n1-standard-4";
    static long replicaCount = 1;
    
    
    static String getJobBody() throws JSONException {
        JSONObject jobBody = new JSONObject();
        jobBody.put("display_name", trainingJobName);
        JSONObject base_output_directory = new JSONObject();
        base_output_directory.put("output_uri_prefix", outputUriPrefix);
        jobBody.put("base_output_directory", base_output_directory);
        JSONObject jobSpec = new JSONObject();
        JSONArray worker_pool_specs = new JSONArray();
        JSONObject spec = new JSONObject();
        spec.put("replica_count", replicaCount);
        JSONObject machine_spec = new JSONObject();
        machine_spec.put("machine_type", machineType);
        spec.put("machine_spec", machine_spec);
        JSONObject container_spec = new JSONObject();
        container_spec.put( "image_uri", imageUri);
        JSONArray args = new JSONArray();
        args.put("--msg=hello!");
        container_spec.put( "args", args);
        spec.put("container_spec", container_spec);
        worker_pool_specs.put(spec);
        jobSpec.put("worker_pool_specs", worker_pool_specs);
        jobBody.put("job_spec", jobSpec);
        return jobBody.toString();
    }
    
    public static void main( String[] args ) throws IOException, JSONException
    {
        System.out.println(String.format("=======STARTING APPLICATION, version %s =======", "v5"));
        
        CloudSchedulerClient client = CloudSchedulerClient.create();
        
        String parent = LocationName.of(projectId, location).toString();
        
        Map<String, String> headers = new HashMap<String, String>();
        headers.put("User-Agent", "Google-Cloud-Scheduler");
        headers.put("Content-Type", "application/json; charset=utf-8");
        
        OAuthToken token = OAuthToken.newBuilder()
                .setServiceAccountEmail(serviceAccountEmail)
                .setScope(scope)
                .build();       
                
        HttpTarget httpTarget = HttpTarget.newBuilder()
                .setUri(httpTargetUri)
                .setHttpMethod(HttpMethod.POST)
                .putAllHeaders(headers)
                .setBody(ByteString.copyFromUtf8(getJobBody()))
                .setOauthToken(token)
                .build();   
        
        Job job = Job.newBuilder()
                .setName(schedulerJobName)
                .setDescription("test java job")
                .setSchedule("* * * * *")
                .setTimeZone("Africa/Abidjan")
                .setHttpTarget(httpTarget)
                .build();
        
        client.createJob(parent, job);
        client.close();
    }
}

Python Client code:

from google.cloud import scheduler
import json


project_id = "..."
location = "..."
service_account_email = "[email protected]"
output_uri_prefix="gs://.../.../"
image_uri=f'{location}-docker.pkg.dev/{project_id}/.../...'

traning_job__name ="custom_training_job"
scheduler_job_name = f'projects/{project_id}/locations/{location}/jobs/python_job'
scope = "https://www.googleapis.com/auth/cloud-platform"
http_target_uri = f'https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/customJobs'
machine_type = "n1-standard-4"
replica_count = 1


job_spec = {
    "display_name": traning_job__name,
    "job_spec": {
            "worker_pool_specs": [
                {
                    "machine_spec": {
                        "machine_type": machine_type,
                    },
                    "replica_count": replica_count,
                    "container_spec": {
                        "image_uri": image_uri,
                        "args": [
                            "--msg=hello!"
                        ]
                    }
                }
            ],
        "base_output_directory": {
            "output_uri_prefix": output_uri_prefix
        }
    }
}


job = {
  "name": scheduler_job_name,
  "description": "Created from Python client",
  "http_target": {
    "uri": http_target_uri,
    "http_method": "POST",
    "headers": {
      "User-Agent": "Google-Cloud-Scheduler",
      "Content-Type": "application/json; charset=utf-8"
    },
    "body": json.dumps(job_spec).encode('utf-8'),
    "oauth_token": {
      "service_account_email": service_account_email,
      "scope": scope
    }
  },
  "schedule": "* * * * *",
  "time_zone": "Africa/Abidjan"
}


client = scheduler.CloudSchedulerClient()
parent = f'projects/{project_id}/locations/{location}' 
response = client.create_job(parent = parent, job = job)

EDIT

The problem was that in the getJobBody function, I was setting base_output_directory as a top level field, whereas it should be a nested field inside the job_spec. The problem is solved but is there a better way to do this? I know there is a CustomJobSpec class, but could not find a way to convert it into a Json style string.

Upvotes: 0

Views: 774

Answers (1)

racerX
racerX

Reputation: 1092

As mentioned in the edit, the problem was that in the getJobBody function, the base_output_directory was being set as a top level field, whereas it should be a nested field inside the job_spec. So currently, as far as I know, the way to avoid this mistake is to set the jobBody carefully, I don't know of a way to do this in a more structured manner.

Upvotes: 1

Related Questions