from openvino.runtime import Core
import numpy as np

# Initialize the OpenVINO runtime Core
ie = Core()

# Load and compile the model for the CPU device
compiled_model = ie.compile_model(model='../ovc_output/converted_model.xml', device_name="CPU")

# Prepare input: a non tokenized example just for examples sake
input_ids = np.random.randint(0, 50256, (1, 10))

# Create a dictionary for the inputs expected by the model
inputs = {"input_ids": input_ids}

# Create an infer request and start synchronous inference
result = compiled_model.create_infer_request().infer(inputs=inputs)

# Access output tensor data directly from the result using the appropriate output key
output = result['outputs']

print("Inference results:", output)