import xml.etree.ElementTree as ET # Library to parse XML data

import openai # OpenAI API library for interacting with GPT models

import json # Library to handle JSON data

import csv # Library to handle CSV files

from requests.auth import HTTPBasicAuth # For HTTP Basic Authentication

import re # Regular expressions library for pattern matching

WEBSITE = “https://www.kantspel.se” # Base URL of the website

USERNAME = “Lucas” # Username for authentication

PASSWORD = “pk9Q v587 suNm Uxvd zOsJ PCmp” # Password for authentication

SITEMAP_URL = WEBSITE + “/post-sitemap.xml” # URL to the website’s sitemap

OPENAI_API_KEY = “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” # OpenAI API Key

AUTH = HTTPBasicAuth(USERNAME, PASSWORD) # Authentication object using HTTP Basic Auth

def read_csv_file(file_path):


Reads a CSV file and returns a list of dictionaries, each representing a row.

:param file_path: Path to the CSV file.

:return: List of dictionaries containing CSV data.


with open(file_path, mode='r') as file:

    reader = csv.DictReader(file)  # Initialize CSV DictReader

    return list(reader)  # Convert reader to list and return

def get_post_content(post_id):


Fetches the content of a WordPress post by its ID.

:param post_id: ID of the post to fetch.

:return: Rendered HTML content of the post or None if an error occurs.



    url = f"{WEBSITE}/wp-json/wp/v2/posts/{post_id}"  # Construct API endpoint URL

    response = requests.get(url, auth=AUTH)  # Make GET request with authentication

    response.raise_for_status()  # Raise exception for HTTP errors

    content = response.json()['content']['rendered']  # Extract rendered content from JSON response

    return content  # Return the post content

except requests.RequestException as e:

    print(f"Error fetching post content: {e}")  # Print error message

    return None  # Return None on failure

def get_post_title(post_id):


Retrieves the title of a WordPress post by its ID.

:param post_id: ID of the post.

:return: Title of the post or None if an error occurs.



    response = requests.get(f"{WEBSITE}/wp-json/wp/v2/posts/{post_id}", auth=AUTH)  # Make GET request

    response.raise_for_status()  # Raise exception for HTTP errors

    return response.json()['title']['rendered']  # Extract and return the rendered title

except requests.RequestException as e:

    print(f"Error fetching post title: {e}")  # Print error message

    return None  # Return None on failure

def get_posts_from_sitemap(current_post_url):


Parses the sitemap to retrieve a list of post URLs, excluding the current post and the first two posts.

:param current_post_url: URL of the current post to exclude.

:return: List of post URLs.



    response = requests.get(SITEMAP_URL)  # Fetch the sitemap XML

    response.raise_for_status()  # Raise exception for HTTP errors

    sitemap = ET.fromstring(response.content)  # Parse XML content

    namespaces = {'ns': 'http://www.sitemaps.org/schemas/sitemap/0.9'}  # Define XML namespaces

    current_post_url = current_post_url.rstrip('/')  # Normalize current post URL by removing trailing slash

    # Extract all URLs from the sitemap, exclude the current post, and skip the first two entries

    return [url.text for url in sitemap.findall('ns:url/ns:loc', namespaces)

            if url.text.rstrip('/') != current_post_url][2:]

except requests.RequestException as e:

    print(f"Error fetching posts from sitemap: {e}")  # Print error message

    return []  # Return empty list on failure

def extract_keywords_and_intent(text):


Extracts keywords and intent from a given text using OpenAI's API.

:param text: The text to analyze.

:return: Dictionary containing 'keywords' and 'intent'.


prompt = (

    "Extract the main keywords and the intent of the following text. "

    "Return the result in JSON format with two fields: 'keywords' (a list of relevant keywords) "

    "and 'intent' (a brief description of the main intent).\n\n"

    f"Text: {text}"


openai.api_key = OPENAI_API_KEY  # Ensure this is correctly set

messages = [

    {"role": "system", "content": "You are a knowledgeable assistant skilled in text analysis."},

    {"role": "user", "content": prompt},



    response = openai.ChatCompletion.create(

        model="gpt-4o-mini",  # Use the appropriate model





    response_content = response.choices.message['content'].strip()

    # Ensure the response starts with a JSON object

    if not response_content.startswith('{'):

        first_brace = response_content.find('{')

        if first_brace != -1:

            response_content = response_content[first_brace:]


            raise ValueError("No JSON object found in the response.")

    data = json.loads(response_content)

    return data

except json.JSONDecodeError as e:

    print(f"JSON decoding error in extract_keywords_and_intent: {e}. Response Content: {response_content}")

    return {"keywords": [], "intent": ""}

except Exception as e:

    print(f"Error extracting keywords and intent: {e}")

    return {"keywords": [], "intent": ""}

def ask_openai(prompt):


Sends a prompt to OpenAI's ChatCompletion API and retrieves the response.

:param prompt: The prompt string to send to OpenAI.

:return: The content of OpenAI's response or None if an error occurs.


openai.api_key = OPENAI_API_KEY  # Set the OpenAI API key

messages = [

    {"role": "system", "content": "You are a helpful assistant."},  # System message defining assistant behavior

    {"role": "user", "content": prompt},  # User's prompt



    response = openai.ChatCompletion.create(

        model="gpt-4o-mini",  # Specify the model to use

        messages=messages,  # Pass the messages list

        max_tokens=4096,  # Maximum number of tokens in the response

        temperature=0.3  # Controls the randomness of the response


    return response.choices.message['content']  # Return the content of the first choice

except Exception as e:

    print(f"Error in OpenAI request: {e}")  # Print error message

    return None  # Return None on failure

def lister(post_id, post_url):


Main processing function that gathers post data, extracts keywords and intent,

interacts with OpenAI to get article ratings, and returns a list of URLs with high relevance ratings.

:param post_id: ID of the post being processed.

:param post_url: URL of the post being processed.

:return: Tuple containing a list of relevant URLs and the post content.


print(f"Processing in lister - Post ID: {post_id}, Post URL: {post_url}")  # Debugging information

post_title = get_post_title(post_id)  # Retrieve the post title

post_content = get_post_content(post_id)  # Retrieve the post content

if not post_content:

    print(f"No content found for Post ID {post_id}. Skipping.")

    return [], None

print(f"Post Content for ID {post_id}: {post_content[:100]}...")  # Print first 100 characters of content for debugging

post_urls = get_posts_from_sitemap(post_url)  # Get list of related post URLs from sitemap

print(f"URLs from Sitemap for Post ID {post_id}: {post_urls}")  # Debugging information

with open('internal_link_finder.txt', 'r', encoding='utf-8') as file:

    custom_prompt_template = file.read()  # Read the custom prompt template from a file

# Extract keywords and intent from the current post

analysis = extract_keywords_and_intent(post_content)

post_keywords = ', '.join(analysis.get('keywords', []))

post_intent = analysis.get('intent', '')

# Replace placeholders in the prompt template with actual post data

custom_prompt = custom_prompt_template.replace("[POST_TITLE]", post_title)

custom_prompt = custom_prompt.replace("[POST_CONTENT]", post_content)

custom_prompt = custom_prompt.replace("[POST_KEYWORDS]", post_keywords)

custom_prompt = custom_prompt.replace("[POST_INTENT]", post_intent)

custom_prompt = custom_prompt.replace("[POST_URLS]", ', '.join(post_urls))

# Append instructions to enforce JSON response format

custom_prompt += (

    "\n\nPlease provide the response in strictly valid JSON format as follows:\n"


    "  \"article_ratings\": [\n"

    "    {\"url\": \"URL1\", \"relevance_rating\": 8, \"keywords\": [\"keyword1\", \"keyword2\"], \"intent\": \"intent1\"},\n"

    "    {\"url\": \"URL2\", \"relevance_rating\": 9, \"keywords\": [\"keyword3\", \"keyword4\"], \"intent\": \"intent2\"},\n"

    "    {\"url\": \"URL3\", \"relevance_rating\": 7, \"keywords\": [\"keyword5\", \"keyword6\"], \"intent\": \"intent3\"}\n"

    "  ]\n"



openai_response = ask_openai(custom_prompt)  # Send the prompt to OpenAI and get the response

if not openai_response:

    print(f"OpenAI did not return a valid response for Post ID {post_id}.")

    return [], post_content  # Return empty list and post content

# Use regex to extract JSON content from the OpenAI response

json_pattern = re.compile(r'\{.*\}', re.DOTALL)  # Pattern to match JSON objects

match = json_pattern.search(openai_response)  # Search for JSON in the response

if not match:

    print(f"No JSON found in OpenAI response for Post ID {post_id}. Response: {openai_response}")

    return [], post_content  # Return empty list and post content

json_string = match.group()  # Extract the matched JSON string


    response_json = json.loads(json_string)  # Parse the JSON string into a Python dictionary

except json.JSONDecodeError as e:

    print(f"JSON decoding error for Post ID {post_id}: {e}. JSON string: {json_string}")

    return [], post_content  # Return empty list and post content

article_ratings = response_json.get('article_ratings', [])  # Extract the 'article_ratings' list from JSON

# Initialize a list to store URLs with high relevance ratings

url_list = []

for rating_entry in article_ratings:

    url = rating_entry.get('url')  # Get the URL from the rating entry

    rating = rating_entry.get('relevance_rating', 0)  # Get the relevance rating, default to 0 if missing

    if url and rating >= 8:  # Check if URL exists and rating is 8 or higher

        url_list.append(url)  # Add to the list

# Debugging information

print(f"Custom Prompt: {custom_prompt}")

print(f"OpenAI Response: {openai_response}")

print(f"Final URL List: {url_list}")

return url_list, post_content  # Return the list of relevant URLs and the post content