import os
import glob
import matplotlib.pyplot as plt
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt') # Download the necessary resource for tokenization
# Step 1: Read the text files from the folder
folder_path = 'text_pythonENG'
text_files = glob.glob(os.path.join(folder_path, '*.txt'))
# List of manually selected keywords
selected_keywords = ['flowres', 'flowers', 'courses', 'termes', 'terms', 'purgations', 'Flowres', 'Flowers', 'Courses', 'Termes', 'Terms', 'Purgations', 'Menses', 'Catamenia']
# Initialize a dictionary to store keyword occurrences per text
text_keyword_occurrences = {keyword: [] for keyword in selected_keywords}
# Step 2: Tokenize the text into words and count occurrences
for file_path in text_files:
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
words = word_tokenize(text)
# Count keyword occurrences in the text
keyword_counts = {keyword: words.count(keyword) for keyword in selected_keywords}
# Store keyword occurrences for the text
for keyword in selected_keywords:
text_keyword_occurrences[keyword].append(keyword_counts[keyword])
# Step 3: Create and save a bar chart for each individual text
output_folder = 'barcharts_menENG' # Set the path where you want to save the charts
for file_path in text_files:
text_name = os.path.basename(file_path)
plt.figure(figsize=(10, 6))
for keyword in selected_keywords:
plt.bar(keyword, text_keyword_occurrences[keyword][text_files.index(file_path)], label=keyword)
plt.xlabel('Keywords : Menstruation')
plt.ylabel('Occurrences')
plt.title(f'Keyword Occurrences in Text: {text_name}')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
# Save the bar chart as an image
chart_file_name = os.path.splitext(text_name)[0] + '_chart.png'
chart_file_path = os.path.join(output_folder, chart_file_name)
plt.savefig(chart_file_path)
# Display the bar chart (optional)
plt.show()
[nltk_data] Downloading package punkt to [nltk_data] C:\Users\falle\AppData\Roaming\nltk_data... [nltk_data] Package punkt is already up-to-date!