Saturday, October 12, 2024
import requests
import re
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Function to clean and tokenize text
def clean_text(text):
# Convert to lowercase
text = text.lower()
# Remove special characters and numbers
text = re.sub(r'[^a-z\s]', '', text)
# Tokenize text
words = word_tokenize(text)
# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]
return filtered_words
# Function to generate tags from text (e.g., YouTube title, description, or blog post content)
def generate_tags(text, max_tags=10):
# Clean and tokenize the input text
words = clean_text(text)
# Count word frequency
word_counts = Counter(words)
# Get the most common words (tags)
tags = [tag for tag, count in word_counts.most_common(max_tags)]
return tags
# Function to fetch content from a Blogger post using Blogger API
def fetch_blog_content(blog_id, post_id, api_key):
# Blogger API endpoint
url = f"https://www.googleapis.com/blogger/v3/blogs/{blog_id}/posts/{post_id}?key={api_key}"
# Fetch post content
response = requests.get(url)
if response.status_code == 200:
post_data = response.json()
title = post_data['title']
content = post_data['content']
return title, content
else:
print(f"Error: Unable to fetch post content (Status Code: {response.status_code})")
return None, None
# Example usage
def main():
# Replace with your Blogger API details
blog_id = 'YOUR_BLOG_ID'
post_id = 'YOUR_POST_ID'
api_key = 'YOUR_GOOGLE_API_KEY'
# Fetch the blog post content
blog_title, blog_content = fetch_blog_content(blog_id, post_id, api_key)
if blog_title and blog_content:
# Generate tags from the blog post title and content
combined_text = blog_title + " " + blog_content
tags = generate_tags(combined_text)
print("Generated Tags:", tags)
else:
print("Failed to retrieve blog content.")
if __name__ == "__main__":
main()
Post a Comment