I have succeeded in scraping post title data automatically using the website input URL in wordpress add post, I want to add scrape ID and date data but the elements are the same.
custom-admin-script.js
I want to scrape this data and display it directly in the metabox code and date that I have created.
PHP:
<?php
// Function to scrape data from the specified URL
function scrape_data_from_url($url) {
// Generate a unique identifier for the temporary file
$temp_file = tempnam(sys_get_temp_dir(), 'scrape_');
// Send a GET request to the URL and save the response to the temporary file
$response = wp_remote_get($url, array('timeout' => 30, 'stream' => true, 'filename' => $temp_file));
// Check if the request was successful (status code 200)
if (!is_wp_error($response) && isset($response['response']['code']) && $response['response']['code'] == 200) {
// Parse the HTML content of the page
$html = file_get_contents($temp_file);
$dom = new DOMDocument();
libxml_use_internal_errors(true); // Disable libxml errors
$dom->loadHTML($html);
libxml_clear_errors(); // Clear libxml errors
// Extract the title from the HTML structure
$title_element = $dom->getElementsByTagName('h3')[0];
if ($title_element) {
$title = $title_element->getElementsByTagName('a')[0]->nodeValue;
return $title;
} else {
// If the title element is not found
return 'Title element not found on the page.';
}
} else {
// If the HTTP request fails
$error_message = is_wp_error($response) ? $response->get_error_message() : 'Unknown error';
return "Failed to retrieve data from the specified URL. Error: $error_message";
}
}
// Hook into WordPress to update post title based on scraped data
function update_post_title_from_scraped_data($post_id) {
// Check if it's not an autosave and the post type is 'post'
if (!defined('DOING_AUTOSAVE') && get_post_type($post_id) === 'post') {
// Get the URL from the custom metabox
$url = get_post_meta($post_id, '_custom_url', true);
// Check if the URL is set and not empty
if ($url) {
// Scrape data from the URL
$scraped_title = scrape_data_from_url($url);
// Update the post title with the scraped data
if ($scraped_title) {
wp_update_post(array('ID' => $post_id, 'post_title' => $scraped_title));
} else {
// If scraping fails
echo 'Scraping failed: ' . $scraped_title;
}
}
}
}
// Hook into WordPress to add the metabox and save the data
function add_custom_url_metabox() {
add_meta_box(
'custom_url_metabox',
'Website URL',
'render_custom_url_metabox',
'post',
'normal',
'high'
);
}
function render_custom_url_metabox($post) {
// Retrieve the current URL value
$url_value = get_post_meta($post->ID, '_custom_url', true);
?>
<label for="custom_url">Enter Website URL:</label>
<input type="text" id="custom_url" name="custom_url" value="<?php echo esc_attr($url_value); ?>" style="width: 100%;" />
<?php
}
function save_custom_url_metabox($post_id) {
if (isset($_POST['custom_url'])) {
$url = sanitize_text_field($_POST['custom_url']);
update_post_meta($post_id, '_custom_url', $url);
}
}
add_action('add_meta_boxes', 'add_custom_url_metabox');
add_action('save_post', 'save_custom_url_metabox');
add_action('save_post', 'update_post_title_from_scraped_data');
function custom_admin_scripts() {
wp_enqueue_script('custom-admin-script', get_template_directory_uri() . '/assets/js/custom-admin-script.js', array('jquery'), null, true);
}
add_action('admin_enqueue_scripts', 'custom_admin_scripts');
// Server-side function to fetch title data from URL
function get_title_from_url() {
if (isset($_POST['url'])) {
$url = esc_url_raw($_POST['url']);
// Fetch title data using scraping function or other methods as needed
$title = scrape_data_from_url($url);
echo $title;
}
wp_die();
}
add_action('wp_ajax_get_title_from_url', 'get_title_from_url');
custom-admin-script.js
JavaScript:
jQuery(document).ready(function($) {
// Creating a function to fetch the title data from the URL using AJAX
function getTitleFromURL(url) {
$.ajax({
type: 'POST',
url: ajaxurl,
data: {
action: 'get_title_from_url',
url: url,
},
success: function(response) {
// Updating the title input value on the admin page
$('#title').val(response);
},
});
}
// Listening for changes in the URL input
$('#custom_url').on('input', function() {
var url = $(this).val();
// Calling the function to fetch title data from the URL
getTitleFromURL(url);
});
});
I want to scrape this data and display it directly in the metabox code and date that I have created.
HTML:
<tr>
<td class="header">ID:</td>
<td class="text">Value ID</td>
</tr>
<tr>
<td class="header">Date:</td>
<td class="text">Value Date</td>
</tr>