# This block is intended for use in a Google Colab environment, enabling
# the mounting of Google Drive and access to files stored.
#
# It defines a working directory within Google Drive and switches the
# current directory to that location if it exists.
#
# Note:
# If you are running this project in a local environment, outside of
# Google Colab, or accessing data through other means, this block
# is not required and can be removed or adapted accordingly.

# Specify workspace folder path
drive_folder_path = '/content/drive/My Drive/ai_in_european_public_administration'

import os
from google.colab import drive


# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Automatically detect current working directory
current_directory = os.getcwd()
print(f"Current working directory before mounting: {current_directory}")

# Check if the path exists and change to the directory if it does
if os.path.exists(drive_folder_path):
    os.chdir(drive_folder_path)
    print(f"Changed to directory: {drive_folder_path}")
else:
    print(f"Directory does not exist: {drive_folder_path}")
    print("Please check the folder path.")

# Verify the current working directory after changing
new_directory = os.getcwd()
print(f"Current working directory after mounting: {new_directory}")

Mounted at /content/drive
Current working directory before mounting: /content
Changed to directory: /content/drive/My Drive/ai_in_european_public_administration
Current working directory after mounting: /content/drive/My Drive/ai_in_european_public_administration

# This block prints the current Python version used in the environment.
#
# In this case, the environment is running:
# - Python 3.12.13
#
# This information is useful for reproducibility and debugging, ensuring
# compatibility across different systems and environments.

import sys


print(sys.version)

3.12.13 (main, Mar  4 2026, 09:23:07) [GCC 11.4.0]

# This block installs the project dependencies from the 'requirements.txt' file.
#
# Although Google Colab already includes many commonly used libraries
# (such as pandas and plotly), this step ensures that the exact versions
# specified in the project are installed.
#
# This is especially useful for reproducibility and portability.

!pip install -r requirements.txt

Requirement already satisfied: pandas==2.2.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 1)) (2.2.2)
Requirement already satisfied: plotly==5.24.1 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 2)) (5.24.1)
Requirement already satisfied: numpy>=1.26.0 in /usr/local/lib/python3.12/dist-packages (from pandas==2.2.2->-r requirements.txt (line 1)) (2.0.2)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas==2.2.2->-r requirements.txt (line 1)) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas==2.2.2->-r requirements.txt (line 1)) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas==2.2.2->-r requirements.txt (line 1)) (2025.3)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.12/dist-packages (from plotly==5.24.1->-r requirements.txt (line 2)) (9.1.4)
Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from plotly==5.24.1->-r requirements.txt (line 2)) (26.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas==2.2.2->-r requirements.txt (line 1)) (1.17.0)

# This block displays the currently installed versions of key libraries
# used in the project, allowing you to verify that the environment
# matches the versions specified in requirements.txt.
#
# In this case, the expected versions are:
# - pandas 2.2.2
# - plotly 5.24.1

!pip show pandas
!pip show plotly

Name: pandas
Version: 2.2.2
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Author: 
Author-email: The Pandas Development Team <pandas-dev@python.org>
License: BSD 3-Clause License

Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.

Copyright (c) 2011-2023, Open source contributors.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
  contributors may be used to endorse or promote products derived from
  this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Location: /usr/local/lib/python3.12/dist-packages
Requires: numpy, python-dateutil, pytz, tzdata
Required-by: access, arviz, bigframes, bigquery-magics, bokeh, bqplot, cmdstanpy, cufflinks, datasets, db-dtypes, dopamine_rl, esda, fastai, geemap, geopandas, google-colab, gradio, gspread-dataframe, holoviews, inequality, libpysal, mapclassify, mizani, mlxtend, momepy, pandas-datareader, pandas-gbq, panel, plotnine, pointpats, prophet, pymc, pysal, seaborn, segregation, shap, sklearn-pandas, spaghetti, spopt, spreg, statsmodels, tensorflow_decision_forests, tobler, tsfresh, vega-datasets, xarray, yfinance
Name: plotly
Version: 5.24.1
Summary: An open-source, interactive data visualization library for Python
Home-page: https://plotly.com/python/
Author: Chris P
Author-email: chris@plot.ly
License: MIT
Location: /usr/local/lib/python3.12/dist-packages
Requires: packaging, tenacity
Required-by: cufflinks, geemap

# This block imports the core libraries used for data analysis and
# visualization in this project.
#
# Libraries included:
# - pandas: for data manipulation and analysis
# - plotly.express: for interactive data visualizations
# - plotly.io: for renderer configuration

import pandas as pd
import plotly.express as px
import plotly.io as pio

# Plotly requires different renderers depending on the execution environment.
# Uncomment the line that matches your current environment
# and comment out the other one.
#
#   - "colab"    : Google Colab (interactive figures in Colab output cells)
#   - "notebook" : VS Code / Jupyter / Local, or when downloading
#                  the .ipynb file from Google Colab and running
#                  it in other local environments

# pio.renderers.default = "colab"  # Google Colab
pio.renderers.default = "notebook" # VS Code / Jupyter / Local

# Loading dataset
df = pd.read_csv("pstw_dataset.csv")

# Rapid data exploration (First records)
df.head()

# Unique values ​​in 'Start Year'
print("Unique values ​​in 'Start Year'")
print(df['Start Year'].unique())

# Unique values ​​in 'End Year'
print("Unique values ​​in 'End Year'")
print(df['End Year'].unique())

Unique values ​​in 'Start Year'
[2020 2019 2018 2014 2017 2016 2021 2012 2015 2011 2010 2022 2023 2013
 2025 2007 2024 2003 2008 2026]
Unique values ​​in 'End Year'
[nan '2014' '2021' '2020' '2017' '2024' '2018' '2019' '2023' '2015' '2022'
 '2016' '2025' '2013' '2026' '\xa0' '2028' '2030' '2027' '2029']

# A column exploration is performed to establish horizons on the present
# data and recognize the available data material.

df.columns

Index(['PSTW ID', 'Name', 'Website', 'Description', 'Geographical extent',
       'Geographical coverage (country)', 'NUTS 2021',
       'Responsible organisation', 'Responsible organisation category',
       'Functions of Government (COFOG level I)',
       'Functions of Government (COFOG level II)', ' Status', 'Start Year',
       'End Year', 'Process type', 'Application type', 'Cross Border',
       'Cross Sector', 'Primary Technology', 'Secondary Technology',
       'Interaction', 'Improved Public Service', 'Personalized Services',
       'Public (citizen)-centered services',
       'Increase quality of PSI and services',
       'More responsive, efficient, and cost-effective public services',
       'New services or channels', 'Improved Administrative Efficiency',
       'Cost-reduction', 'Responsiveness of government operation',
       'Improved management of public resources',
       'Increased quality of processes and systems',
       'Better collaboration and better communication',
       'Reduced or eliminated the risk of corruption and abuse of the law by public servants',
       'Enabled greater fairness, honesty, equality',
       'Open government capabilities',
       'Increased transparency of public sector operations',
       'Increased public participation in government actions and policy making',
       'Improved public control and influence on government actions and policies',
       'Source', 'AI Classification (I)',
       'AI Classification Subdomain (II) (main)',
       'AI Classification Subdomain (II) (Other I)',
       'AI Classification Subdomain (II) (Other II)',
       'AI Classification Subdomain (II) (Other III)', 'AI Keywords',
       'Collaboration type', 'Procurement (EU Model Contractual AI)',
       'Funding source', 'GovTech', 'Company name or GovTech ID',
       'GovTech Initiative + ID', 'Open components in input',
       'Open components in output', 'Stories link', 'Date updated'],
      dtype='object')

# Basic data cleaning on the 'Primary Technology' column.
#
# This step removes leading and trailing spaces and converts all text
# to lowercase. Doing so standardizes the values, making it easier
# to recognize and categorize technologies consistently in later steps.

df["Primary Technology"] = (
    df["Primary Technology"]
    .str.strip()
    .str.lower()
)

# Summarize the 'Primary Technology' column by calculating:
# - Count: the number of occurrences of each technology
# - Percentage (%): the proportion of each technology relative to the total,
#   rounded to two decimal places

tech_summary = pd.DataFrame({
    "Count": df["Primary Technology"].value_counts(),
    "Percentage (%)": df["Primary Technology"].value_counts(normalize=True) * 100
})

tech_summary["Percentage (%)"] = tech_summary["Percentage (%)"].round(2)

tech_summary

# Create a donut chart to visually represent the distribution of primary technologies.
#
# The chart uses the percentages calculated in 'tech_summary' to show the relative
# share of each technology.

fig = px.pie(
    tech_summary.rename(index=lambda x: x.title()),
    values="Percentage (%)",
    names=tech_summary.rename(index=lambda x: x.title()).index,
    title="Distribution of Primary Technologies in the Dataset (%)",
    hole=0.4
)

fig.update_traces(
    texttemplate='%{percent:.2%}'
)

fig.update_layout(
    legend=dict(
        title="Primary Technology",
        x=0.75,
        y=0.5,
        xanchor="left",
        yanchor="middle"
    ),
    template="plotly_white",
    title_x=0.5,
    title_y=0.95
)

fig.show()

# Filter the dataset to focus on records where 'Primary Technology' is
# Artificial Intelligence, as it represents the largest share of entries.
ai_df = df[df["Primary Technology"].str.contains(
    "artificial intelligence", case=False, na=False
)].copy()

# Standardize the 'Geographical coverage (country)' column:
# - Remove leading/trailing spaces
# - Convert to title case for consistency
# - Correct known naming inconsistencies ("Monte Carlo" -> "Monaco")
ai_df["Geographical coverage (country)"] = (
    ai_df["Geographical coverage (country)"]
    .str.strip()
    .str.title()
    .replace({"Monte Carlo": "Monaco"})
)

# Count the number of AI records by country
ai_by_country = ai_df["Geographical coverage (country)"].value_counts()

# Display the counts
ai_by_country

# Confirm total AI Cases records
print("\nTotal:", ai_by_country.sum())

Total: 1805

# Create a bar chart showing the top 7 countries with the highest number of
# AI cases in public administrations. This visualization helps identify
# trends and patterns in AI adoption across Europe.
#
# The x-axis represents countries, the y-axis shows the number of AI cases.

fig = px.bar(
    ai_by_country.head(7),
    x=ai_by_country.head(7).index,
    y=ai_by_country.head(7).values,
    labels={
        "x": "Country",
        "y": "Number of AI Cases"
    },
    title="Top 7 Countries by Number of Recorded AI Cases"
)

fig.update_layout(
    title_x=0.5,
    title_y=0.95,
)

fig.show()

# Identify all unique European countries present in the AI dataset.
#
# This allows for later classification of each country into its
# corresponding European region, supporting regional analysis
# and comparisons of AI adoption trends.

sorted(ai_df["Geographical coverage (country)"].unique())

['Albania',
 'Austria',
 'Belgium',
 'Bosnia And Herzegovina',
 'Bulgaria',
 'Croatia',
 'Cyprus',
 'Czechia',
 'Denmark',
 'Estonia',
 'European Union',
 'Finland',
 'France',
 'Germany',
 'Greece',
 'Hungary',
 'Iceland',
 'Ireland',
 'Italy',
 'Latvia',
 'Lithuania',
 'Luxembourg',
 'Malta',
 'Moldova',
 'Monaco',
 'Netherlands',
 'North Macedonia',
 'Norway',
 'Poland',
 'Portugal',
 'Romania',
 'Serbia',
 'Slovakia',
 'Slovenia',
 'Spain',
 'Sweden',
 'Switzerland',
 'Türkiye',
 'Ukraine',
 'United Kingdom',
 'Vatican City']

# Classify European countries into regions.
#
# The 'region_map' dictionary groups each country into one of the following:
# - Northern Europe
# - Western Europe
# - Southern Europe
# - Eastern Europe
# - European Union
#
# I then create a new column 'European Region' in the AI dataset using this map.
# This makes it easier to perform regional analysis and compare AI adoption trends
# across different parts of Europe.
#
# Finally, I do a quick check of the new classification by displaying
# the first few rows of the relevant columns.

region_map = {
    # Northern Europe
    "Denmark": "Northern Europe",
    "Finland": "Northern Europe",
    "Iceland": "Northern Europe",
    "Ireland": "Northern Europe",
    "Norway": "Northern Europe",
    "Sweden": "Northern Europe",
    "United Kingdom": "Northern Europe",
    "Estonia": "Northern Europe",
    "Latvia": "Northern Europe",
    "Lithuania": "Northern Europe",

    # Western Europe
    "Austria": "Western Europe",
    "Belgium": "Western Europe",
    "France": "Western Europe",
    "Germany": "Western Europe",
    "Luxembourg": "Western Europe",
    "Netherlands": "Western Europe",
    "Switzerland": "Western Europe",
    "Monaco": "Western Europe",

    # Southern Europe
    "Italy": "Southern Europe",
    "Spain": "Southern Europe",
    "Portugal": "Southern Europe",
    "Greece": "Southern Europe",
    "Croatia": "Southern Europe",
    "Cyprus": "Southern Europe",
    "Malta": "Southern Europe",
    "Albania": "Southern Europe",
    "Bosnia And Herzegovina": "Southern Europe",
    "Serbia": "Southern Europe",
    "North Macedonia": "Southern Europe",
    "Vatican City": "Southern Europe",
    "Slovenia": "Southern Europe",

    # Eastern Europe
    "Poland": "Eastern Europe",
    "Czechia": "Eastern Europe",
    "Slovakia": "Eastern Europe",
    "Hungary": "Eastern Europe",
    "Romania": "Eastern Europe",
    "Bulgaria": "Eastern Europe",
    "Moldova": "Eastern Europe",
    "Ukraine": "Eastern Europe",
    "Türkiye": "Eastern Europe",

    # European Union
    "European Union": "European Union"
}

ai_df["European Region"] = ai_df["Geographical coverage (country)"].map(region_map)
ai_df[["Geographical coverage (country)", "European Region"]].head()

# Count the number of AI records per European region.
#
# This allows observation of which region has the highest number of cases.
#
# Additionally, the percentage of each region relative to the total is calculated.

ai_region_summary = pd.DataFrame({
    "Count": ai_df["European Region"].value_counts(),
    "Percentage (%)": ai_df["European Region"].value_counts(normalize=True) * 100
})

ai_region_summary["Percentage (%)"] = ai_region_summary["Percentage (%)"].round(2)

ai_region_summary

# Create a donut chart to visualize the distribution of AI cases by European region.
#
# The chart uses the percentages calculated in 'ai_region_summary' to show
# the relative share of each region.

fig = px.pie(
    ai_region_summary,
    values="Percentage (%)",
    names=ai_region_summary.index,
    title="Distribution of AI Cases by European Region (%)",
    hole=0.4
)

fig.update_traces(
    texttemplate='%{percent:.2%}',
    textposition='inside'
)

fig.update_layout(
    legend=dict(
        title="European Region",
        x=0.65,
        y=0.5,
        xanchor="left",
        yanchor="middle"
    ),
    template="plotly_white",
    title_x=0.5,
    title_y=0.95
)

fig.show()

# Standardize the sector names in 'Functions of Government (COFOG level I)'.
#
# This step ensures consistency by:
# - Removing leading and trailing spaces
# - Converting each word to title case (first letter capitalized)

ai_df["Functions of Government (COFOG level I)"] = (
    ai_df["Functions of Government (COFOG level I)"]
    .str.strip()
    .str.title()
)

# Calculate the count and percentage of AI cases by government sector
# using COFOG Level I classification.

sector_summary = pd.DataFrame({
    "Count": ai_df["Functions of Government (COFOG level I)"].value_counts(),
    "Percentage (%)": ai_df["Functions of Government (COFOG level I)"].value_counts(normalize=True) * 100
})

sector_summary["Percentage (%)"] = sector_summary["Percentage (%)"].round(2)

sector_summary

# Create a donut chart to visualize the distribution of AI cases by government sector
# (COFOG Level I).
#
# The chart shows the relative share of AI adoption across different sectors
# in European public administration.

fig = px.pie(
    sector_summary,
    values="Percentage (%)",
    names=sector_summary.index,
    title="Distribution of AI Cases by COFOG Level I Sector (%)",
    hole=0.4
)

fig.update_traces(
    texttemplate='%{percent:.2%}',
    textposition='inside'
)

fig.update_layout(
    legend=dict(
        title="Government Sector (COFOG Level I)",
        x=0.65,
        y=0.5,
        xanchor="left",
        yanchor="middle"
    ),
    template="plotly_white",
    title_x=0.5,
    title_y=0.95
)

fig.show()

# Get all unique government sectors from the COFOG Level I classification.
#
# Sorting them alphabetically makes it easier to review the categories
# and check for any inconsistencies or duplicates.

unique_sectors = ai_df["Functions of Government (COFOG level I)"].unique()

sorted(unique_sectors)

['Economic Affairs',
 'Education',
 'Environmental Protection',
 'General Public Services',
 'Health',
 'Housing And Community Amenities',
 'Public Order And Safety',
 'Recreation, Culture And Religion',
 'Social Protection']

# In this step, AI use cases classified under COFOG Level I sectors are grouped
# into two broader analytical categories. The goal is to distinguish whether
# AI adoption in European governments is primarily oriented toward improving
# citizen-facing public services or toward optimizing internal administrative
# and governmental operations.
#
# This classification enables a higher-level interpretation of how AI is being
# integrated within the public sector and supports further analysis of the
# balance between service delivery and administrative efficiency.

# List of COFOG sectors related to internal government administration,
# regulatory functions, and state management activities
internal_administration = [
    "General Public Services",
    "Economic Affairs",
    "Environmental Protection"
]

# List of COFOG sectors associated with services directly delivered
# to citizens, such as healthcare, education, and public safety
citizen_services = [
    "Education",
    "Health",
    "Social Protection",
    "Housing And Community Amenities",
    "Recreation, Culture And Religion",
    "Public Order And Safety"
]

# Function that assigns each COFOG sector to one of the analytical groups
# defined above: Citizen Services, Internal Administration, or Other
def classify_sector(sector):
    if sector in citizen_services:
        return "Citizen Services"
    elif sector in internal_administration:
        return "Internal Administration"
    else:
        return "Other"

# Apply the classification function to the COFOG Level I column
# and create a new variable indicating the type of government AI application
ai_df["Government AI Application Type"] = ai_df[
    "Functions of Government (COFOG level I)"
].apply(classify_sector)

# Display the first rows of the dataset to verify the new classification column
ai_df.head()

# Count and percentage of AI cases by type of government application
# This summary shows whether AI is more concentrated in citizen-facing
# services or in internal administrative functions.

ai_application_summary = pd.DataFrame({
    "Count": ai_df["Government AI Application Type"].value_counts(),
    "Percentage (%)": ai_df["Government AI Application Type"].value_counts(normalize=True) * 100
})

ai_application_summary["Percentage (%)"] = ai_application_summary["Percentage (%)"].round(2)

ai_application_summary

# Create a donut chart to visualize the distribution of AI application types
# in government.
#
# The chart shows how AI is divided across different use cases, such as
# citizen-facing services and internal administrative functions. Percentages
# are displayed inside each segment to make comparisons easier.

fig = px.pie(
    ai_application_summary,
    values="Percentage (%)",
    names=ai_application_summary.index,
    title="Distribution of AI Cases by Government AI Application Type (%)",
    hole=0.4
)

fig.update_traces(
    texttemplate="%{percent:.2%}",
    textposition="inside"
)

fig.update_layout(
    legend_title="Government AI Application Type",
    template="plotly_white"
)

fig.update_layout(
    legend=dict(
        title="Government AI Application Type",
        x=0.65,
        y=0.5,
        xanchor="left",
        yanchor="middle"
    ),
    template="plotly_white",
    title_x=0.5,
    title_y=0.95
)

fig.show()

# Standardize the 'AI Classification (I)' column.
#
# This step cleans the text by removing leading and trailing spaces
# and converting values to title case. This helps ensure consistency
# when grouping and analyzing different AI capabilities.

ai_df["AI Classification (I)"] = (
    ai_df["AI Classification (I)"]
    .str.strip()
    .str.title()
)

# List all unique AI categories
unique_ai_types = ai_df["AI Classification (I)"].unique()
print("Unique AI Types:", unique_ai_types)

Unique AI Types: ['Learning' 'Communication' 'Reasoning' 'Perception' 'Planning'
 'Integration And Interaction' 'Services' 'Ethics And Philosophy'
 'Ai Services']

# Count how many records fall into each AI classification and calculate
# their corresponding percentages.
#
# This gives a clearer view of how different AI capabilities are distributed
# across the dataset.
#
# The results are then combined into a single summary DataFrame.

ai_types_count = ai_df["AI Classification (I)"].value_counts()
ai_types_percentage = (ai_df["AI Classification (I)"].value_counts(normalize=True) * 100).round(2)

ai_types_summary = pd.DataFrame({
    "Count": ai_types_count,
    "Percentage (%)": ai_types_percentage
})
ai_types_summary

# Create a donut chart to show how different AI capabilities are distributed
# across the dataset.

fig = px.pie(
    ai_types_summary,
    values="Percentage (%)",
    names=[label if label != "Ai Services" else "AI Services"
           for label in ai_types_summary.index],
    title="Distribution of AI Cases by AI Capability (%)",
    hole=0.4
)

fig.update_traces(
    texttemplate="%{label}: %{percent:.2%}",
    textposition="outside",
    pull=[0.05 if v < 5 else 0 for v in ai_types_summary["Percentage (%)"]],
    marker=dict(line=dict(color='white', width=1))
)

fig.update_layout(
    legend=dict(
        title="AI Classification",
        x=0.95,
        y=0.5,
        xanchor="left",
        yanchor="middle"
    ),
    template="plotly_white",
    title_x=0.5,
    title_y=0.95
)

fig.show()

# Create a cross-tabulation between government sectors (COFOG Level I)
# and AI classifications.

sector_ai = pd.crosstab(
    ai_df["Functions of Government (COFOG level I)"],
    ai_df["AI Classification (I)"]
)

sector_ai

# Calculate what percentage of each AI type appears in each government sector.
#
# This makes it easier to see the relative distribution of AI capabilities
# within each sector, rather than just the raw counts.

sector_ai_percentage = sector_ai.div(sector_ai.sum(axis=1), axis=0) * 100
sector_ai_percentage = sector_ai_percentage.round(2)

sector_ai_percentage

# Create a heatmap to show how different AI types are distributed across
# government sectors (COFOG Level I).
#
# Each cell represents the percentage of AI cases of a certain type within
# a specific sector, making it easy to spot patterns or concentrations
# of AI adoption.

fig = px.imshow(
    sector_ai_percentage,
    text_auto=".2f",
    color_continuous_scale="Blues",
    labels=dict(
        x="AI Classification",
        y="Government Sector (COFOG Level I)",
        color="Percentage (%)"
    ),
    width=1000,
    height=800
)

fig.update_layout(
    template="plotly_white",
    title=dict(
        text="Distribution of AI Capabilities Across Government Sectors (%), normalized by sector",
        x=0.5,
        xanchor="center",
        yanchor="top",
        pad=dict(t=100),
        font=dict(size=18)
    ),
    xaxis=dict(
        side="bottom",
        title="AI Classification",
        title_standoff=10,
        tickangle=-30,
        tickfont=dict(size=12),
        tickvals=list(range(len(sector_ai_percentage.columns))),
        ticktext=[
            label if label != "Ai Services" else "AI Services"
            for label in sector_ai_percentage.columns
        ]
    ),
    yaxis=dict(
        tickfont=dict(size=12)
    )
)

fig.show()

# Group the impact-related columns into three broad categories for analysis.
#
# 1. Service Impact:
#    Columns that measure improvements in public services delivered to citizens,
#    such as better quality, more personalized services, or new service channels.
service_impact_cols = [
    "Improved Public Service",
    "Personalized Services",
    "Public (citizen)-centered services",
    "Increase quality of PSI and services",
    "More responsive, efficient, and cost-effective public services",
    "New services or channels"
]


# 2. Administrative Impact:
#    Columns that reflect improvements in the government's internal efficiency,
#    like better resource management, cost reduction, or improved collaboration.
administrative_impact_cols = [
    "Improved Administrative Efficiency",
    "Cost-reduction",
    "Responsiveness of government operation",
    "Improved management of public resources",
    "Increased quality of processes and systems",
    "Better collaboration and better communication"
]


# 3. Governance Impact:
#    Columns that capture transparency, integrity, fairness, and citizen participation.
#    These measure how AI affects governance quality, public accountability, and openness.
governance_impact_cols = [
    "Reduced or eliminated the risk of corruption and abuse of the law by public servants",
    "Enabled greater fairness, honesty, equality",
    "Open government capabilities",
    "Increased transparency of public sector operations",
    "Increased public participation in government actions and policy making",
    "Improved public control and influence on government actions and policies"
]

# All impact columns
impact_cols = (
    service_impact_cols +
    administrative_impact_cols +
    governance_impact_cols
)

# Check the data types of the selected impact columns
ai_df[impact_cols].dtypes

# Display the first few rows of the impact columns to inspect the data
ai_df[impact_cols].head()

# Count how many AI cases report each type of impact (marked with "x") and
# rank them from most to least frequent.
#
# This helps to see which types of impact are most commonly reported
# across the dataset, giving a quick overview of where AI is having
# the biggest effect in public administration.

result = ai_df[impact_cols].apply(lambda col: col.eq("x").sum()).sort_values(ascending=False)
result.name = "Number of Reported Impacts"
result

# Count how many AI impacts are reported in each broad category:
# - Service Impact
# - Administrative Impact
# - Governance Impact
#
# This gives a quick overview of which type of impact is most commonly
# reported in the dataset, showing where AI is having the largest effect
# in public administration.

service_total = ai_df[service_impact_cols].apply(lambda col: col.eq("x").sum()).sum()
administrative_total = ai_df[administrative_impact_cols].apply(lambda col: col.eq("x").sum()).sum()
governance_total = ai_df[governance_impact_cols].apply(lambda col: col.eq("x").sum()).sum()

impact_category_totals = pd.Series({
    "Service Impact": service_total,
    "Administrative Impact": administrative_total,
    "Governance Impact": governance_total
}).sort_values(ascending=False)

impact_category_totals

# Bar chart showing the total number of reported AI impacts per impact category
# (Service, Administrative, Governance) in public administration cases.

df = impact_category_totals.reset_index()
df.columns = ["Impact Category", "Number of Reported Impacts"]

fig = px.bar(
    df,
    x="Impact Category",
    y="Number of Reported Impacts",
    color="Impact Category",
    color_discrete_sequence=["#4C72B0","#55A868","#C44E52"],
    title="Total Reported Impacts by Category",
)

fig.update_layout(
    xaxis_title="Impact Category",
    yaxis_title="Number of Reported Impacts",
    showlegend=False,
    template="plotly_white",
    width=600,
    height=500
)

fig.show()

# Convert "x" markers to binary values for analysis
ai_df["Improved_Public_Service_binary"] = ai_df["Improved Public Service"].eq("x").astype(int)

# Create a cross-tabulation (crosstab) to show how many AI cases reported
# "Improved Public Service" for each government sector (COFOG level I).
# This allows us to see which sectors most frequently experience improvements in public services.

impact_by_sector = pd.crosstab(
    ai_df["Functions of Government (COFOG level I)"],
    ai_df["Improved_Public_Service_binary"]
)

impact_by_sector.columns = ["Not Improved", "Improved"]

impact_by_sector

# Multiplying by 100 converts the proportions to percentages,
# making it easier to compare sectors.

impact_sector_percentage = pd.crosstab(
    ai_df["Functions of Government (COFOG level I)"],
    ai_df["Improved_Public_Service_binary"],
    normalize="index"
) * 100

impact_sector_percentage.columns = ["Not Improved (%)", "Improved (%)"]

impact_sector_percentage

# Create a heatmap to show the percentage of AI cases reporting
# "Improved Public Service" across different government sectors.
#
# Each cell represents the share of cases in a sector reporting this impact,
# making it easy to see which sectors benefit the most from AI in service delivery.

fig = px.imshow(
    impact_sector_percentage,
    text_auto=".2f",
    color_continuous_scale="Blues",
    labels=dict(
        x="Impact",
        y="Government Sector (COFOG level I)",
        color="Percentage (%)"
    ),
    width=600,
    height=600
)

fig.update_layout(
    template="plotly_white",
    title=dict(
        text="Percentage of AI Cases Reporting Improved Public Service by COFOG Sector (%)",
        x=0.5,
        xanchor="center",
        yanchor="top",
        pad=dict(t=100),
        font=dict(size=14)
    ),
    xaxis=dict(
        side="bottom",
        title="Impact",
        title_standoff=10,
        tickangle=-30,
        tickfont=dict(size=12)
    ),
    yaxis=dict(
        tickfont=dict(size=12)
    )
)

fig.show()

# Display the first rows(20) of the 'GovTech' column
ai_df["GovTech"].head(20)

# Check unique values in 'GovTech' (Category Identification)
ai_df["GovTech"].unique()

array([nan, 'No', 'Yes'], dtype=object)

# Count how many AI projects involve GovTech participation, including cases
# where this information is missing (NaN).
#
# Calculate the percentage of projects for each category to understand
# the distribution relative to the total number of projects.
#
# Combine the counts and percentages into a single DataFrame for
# an easy-to-read summary.

govtech_counts = ai_df["GovTech"].value_counts(dropna=False)
govtech_percentage = (govtech_counts / govtech_counts.sum()) * 100

govtech_summary = pd.DataFrame({
    "Count": govtech_counts,
    "Percentage (%)": govtech_percentage.round(2)
})

govtech_summary

# Summarize and visualize GovTech participation in public sector AI projects
# using a donut chart.

govtech_summary = govtech_summary.reset_index()

fig = px.pie(
    govtech_summary.assign(
        GovTech=govtech_summary["GovTech"].fillna("NaN")
    ),
    values="Percentage (%)",
    names="GovTech",
    title="GovTech Participation in Public Sector AI Projects (%)",
    hole=0.4
)

fig.update_traces(
    texttemplate="%{percent:.2%}",
    textposition="inside"
)

fig.update_layout(
    legend_title="GovTech Participation",
    template="plotly_white",
    title_x=0.5,
    title_y=0.95,
    legend=dict(
        x=0.65,
        y=0.5,
        xanchor="left",
        yanchor="middle"
    )
)

fig.show()

# Clean and standardize the 'Responsible organisation category' column.
#
# This step removes extra spaces, capitalizes each word for consistency,
# and replaces hyphens with spaces. Standardizing the names makes it
# easier to analyze and group responsible organisations.

ai_df["Responsible organisation category"] = (
    ai_df["Responsible organisation category"]
    .str.strip()
    .str.title()
    .str.replace("-", " ", regex=False)
)

# Summarize the responsible organisation categories.
#
# Count how many AI projects are associated with each organisation type
# and calculate their percentages of the total.

org_summary = pd.DataFrame({
    "Count": ai_df["Responsible organisation category"].value_counts(),
    "Percentage (%)": ai_df["Responsible organisation category"].value_counts(normalize=True) * 100
})

org_summary["Percentage (%)"] = org_summary["Percentage (%)"].round(2)

org_summary

# Create a donut chart to visualize the distribution of responsible organisation
# categories in public sector AI projects.

fig = px.pie(
    org_summary,
    values="Percentage (%)",
    names=org_summary.index,
    title="Distribution of AI Cases by Responsible Organisation Category (%)",
    hole=0.4
)

fig.update_traces(
    texttemplate="%{label}<br>%{percent:.2%}",
    textposition="outside",
    pull=[0.03] * len(org_summary),
    marker=dict(line=dict(color='white', width=1))
)

fig.update_layout(
    legend_title="Organisation Category",
    template="plotly_white",
    title_x=0.5,
    title_y=0.95,
    legend=dict(
        x=0.95,
        y=0.5,
        xanchor="left",
        yanchor="middle"
    )
)

fig.show()

	Count	Percentage (%)
Primary Technology
artificial intelligence	1805	78.79
blockchain	346	15.10
ar/vr	43	1.88
digital twins	37	1.62
5g (edge computing)	18	0.79
quantum computing	17	0.74
other	13	0.57
virtual worlds	12	0.52

	Count	Percentage (%)
Functions of Government (COFOG level I)
General Public Services	573	31.75
Economic Affairs	325	18.01
Public Order And Safety	260	14.40
Health	205	11.36
Social Protection	147	8.14
Environmental Protection	123	6.81
Education	66	3.66
Housing And Community Amenities	57	3.16
Recreation, Culture And Religion	49	2.71

	Count	Percentage (%)
AI Classification (I)
Learning	634	35.12
Perception	304	16.84
Communication	276	15.29
Planning	247	13.68
Reasoning	185	10.25
Integration And Interaction	99	5.48
Services	53	2.94
Ethics And Philosophy	6	0.33
Ai Services	1	0.06

AI Classification (I)	Ai Services	Communication	Ethics And Philosophy	Integration And Interaction	Learning	Perception	Planning	Reasoning	Services
Functions of Government (COFOG level I)
Economic Affairs	0.00	6.15	0.00	12.00	29.23	18.15	21.23	8.62	4.62
Education	0.00	9.09	0.00	0.00	48.48	10.61	10.61	13.64	7.58
Environmental Protection	0.00	4.07	0.81	5.69	27.64	36.59	16.26	8.13	0.81
General Public Services	0.17	28.27	0.87	2.97	40.84	6.81	6.81	9.42	3.84
Health	0.00	10.73	0.00	7.80	38.05	16.10	16.59	8.29	2.44
Housing And Community Amenities	0.00	5.26	0.00	3.51	31.58	15.79	22.81	17.54	3.51
Public Order And Safety	0.00	7.31	0.00	3.46	24.62	37.31	15.38	11.92	0.00
Recreation, Culture And Religion	0.00	22.45	0.00	4.08	40.82	20.41	6.12	4.08	2.04
Social Protection	0.00	19.05	0.00	4.76	40.14	3.40	14.97	16.33	1.36

	Not Improved (%)	Improved (%)
Functions of Government (COFOG level I)
Economic Affairs	41.538462	58.461538
Education	39.393939	60.606061
Environmental Protection	77.235772	22.764228
General Public Services	41.884817	58.115183
Health	35.121951	64.878049
Housing And Community Amenities	50.877193	49.122807
Public Order And Safety	83.461538	16.538462
Recreation, Culture And Religion	32.653061	67.346939
Social Protection	38.775510	61.224490

Analysis Notebook: Artificial Intelligence in European Public Administration: A Data-Driven Analysis of Adoption, Capabilities, and Impact¶

Abstract¶

Note - Project Download¶

1. Data Loading and Environment Configuration¶

2. Initial Exploration of Technology Usage in the Dataset (Primary Technology Distribution)¶

3. Distribution of AI Cases Across European Countries and Regions¶

4. AI Adoption Across Government Functions: A COFOG-Based Analysis¶

5. Distribution of AI Capabilities and Sectoral Deployment in European Public Administration¶

6. AI Impacts in the Public Sector: Evidence on Service Delivery, Administrative Efficiency, and Governance¶

7. GovTech Participation & Responsible Organisations in Public Sector AI Projects¶

Conclusion¶

	PSTW ID	Name	Website	Description	Geographical extent	Geographical coverage (country)	NUTS 2021	Responsible organisation	Responsible organisation category	Functions of Government (COFOG level I)	...	Collaboration type	Procurement (EU Model Contractual AI)	Funding source	GovTech	Company name or GovTech ID	GovTech Initiative + ID	Open components in input	Open components in output	Stories link	Date updated
0	PSTW-1	AMS - public empolyment service	https://www.frontiersin.org/articles/10.3389/f...	As of 2020, the Public Employment Service Aust...	National	Austria	AT	Public Employment Service Austria	Central-Government	Social protection	...	NaN	NaN	National-funded project	NaN	NaN	NaN	NaN	NaN	NaN	13/03/2025
1	PSTW-2	Mona - Public chatbot for companies on the sub...	https://chat.oesterreich.gv.at/	The new chatbot "Mona" is intended to cover al...	National	Austria	AT	Unternehmensservice Portal, Austria	Central-Government	General public services	...	NaN	NaN	National-funded project	NaN	NaN	NaN	NaN	NaN	NaN	12/03/2025
2	PSTW-3	CitizenLab - Youth for Climate	https://community.youth4climate.info/homepage	Collecting input was just the easy part: in or...	National	Belgium	BE	CitizenLab, Belgium	Private sector	Environmental protection	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	20/09/2024
3	PSTW-4	Walloon - Agricultural subsidy monitoring with...	https://inspire.ec.europa.eu/sites/default/fil...	Walloon uses geo AI with satellite imagery to ...	Regional	Belgium	BE2	Département de l'Agriculture	Local Government	Economic affairs	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	20/09/2024
4	PSTW-5	AcPaas - Technical procurement documents compa...	https://repository.vlerick.com/server/api/core...	The city of Antwerp has a platform called Antw...	Local	Belgium	BE21	Digipolis & city of Antwerp	Private sector	General public services	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	27/08/2025

	count
Geographical coverage (country)
Germany	220
Italy	201
Netherlands	169
United Kingdom	135
Spain	128
Denmark	87
Finland	87
France	80
Belgium	79
Estonia	77
Portugal	74
Norway	65
European Union	42
Greece	40
Sweden	35
Poland	34
Austria	28
Luxembourg	28
Lithuania	19
Slovenia	17
Czechia	16
Latvia	15
Ireland	15
Switzerland	14
Romania	14
Ukraine	12
Hungary	12
Bulgaria	10
Croatia	10
Albania	9
Cyprus	7
Slovakia	6
Malta	5
Serbia	5
Iceland	3
North Macedonia	2
Moldova	1
Vatican City	1
Bosnia And Herzegovina	1
Türkiye	1
Monaco	1

	Geographical coverage (country)	European Region
0	Austria	Western Europe
1	Austria	Western Europe
2	Belgium	Western Europe
3	Belgium	Western Europe
4	Belgium	Western Europe

	Count	Percentage (%)
European Region
Western Europe	619	34.29
Northern Europe	538	29.81
Southern Europe	500	27.70
Eastern Europe	106	5.87
European Union	42	2.33

	Count	Percentage (%)
Government AI Application Type
Internal Administration	1021	56.57
Citizen Services	784	43.43

	0
Improved Public Service	object
Personalized Services	object
Public (citizen)-centered services	object
Increase quality of PSI and services	object
More responsive, efficient, and cost-effective public services	object
New services or channels	object
Improved Administrative Efficiency	object
Cost-reduction	object
Responsiveness of government operation	object
Improved management of public resources	object
Increased quality of processes and systems	object
Better collaboration and better communication	object
Reduced or eliminated the risk of corruption and abuse of the law by public servants	object
Enabled greater fairness, honesty, equality	object
Open government capabilities	object
Increased transparency of public sector operations	object
Increased public participation in government actions and policy making	object
Improved public control and influence on government actions and policies	object

	Improved Public Service	Personalized Services	Public (citizen)-centered services	Increase quality of PSI and services	More responsive, efficient, and cost-effective public services	New services or channels	Improved Administrative Efficiency	Cost-reduction	Responsiveness of government operation	Improved management of public resources	Increased quality of processes and systems	Better collaboration and better communication	Reduced or eliminated the risk of corruption and abuse of the law by public servants	Enabled greater fairness, honesty, equality	Open government capabilities	Increased transparency of public sector operations	Increased public participation in government actions and policy making	Improved public control and influence on government actions and policies
0	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	x	NaN	NaN	x
1	x	NaN	x	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	x	NaN	x	NaN	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	x	x	x	x
3	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	0
Service Impact	3707
Administrative Impact	2571
Governance Impact	400

	GovTech
0	NaN
1	NaN
2	NaN
3	NaN
4	NaN
5	NaN
6	NaN
7	No
8	NaN
9	NaN
10	NaN
11	NaN
12	NaN
13	NaN
14	Yes
15	NaN
16	NaN
17	NaN
18	NaN
19	NaN

	Count	Percentage (%)
Responsible organisation category
Central Government	659	36.51
Local Government	575	31.86
Academic Research	219	12.13
Regional Government	148	8.20
Consortium	119	6.59
Private Sector	45	2.49
European Institution/Agency	20	1.11
Non Governmental	18	1.00
State Owned Entreprise	2	0.11

	Improved Public Service	Personalized Services	Public (citizen)-centered services	Increase quality of PSI and services	More responsive, efficient, and cost-effective public services	New services or channels	Improved Administrative Efficiency	Cost-reduction	Responsiveness of government operation	Improved management of public resources	Increased quality of processes and systems	Better collaboration and better communication	Reduced or eliminated the risk of corruption and abuse of the law by public servants	Enabled greater fairness, honesty, equality	Open government capabilities	Increased transparency of public sector operations	Increased public participation in government actions and policy making	Improved public control and influence on government actions and policies
0	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	x	NaN	NaN	x
1	x	NaN	x	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	x	NaN	x	NaN	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	x	x	x	x
3	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	GovTech
0	NaN
1	NaN
2	NaN
3	NaN
4	NaN
5	NaN
6	NaN
7	No
8	NaN
9	NaN
10	NaN
11	NaN
12	NaN
13	NaN
14	Yes
15	NaN
16	NaN
17	NaN
18	NaN
19	NaN

	Improved Public Service	Personalized Services	Public (citizen)-centered services	Increase quality of PSI and services	More responsive, efficient, and cost-effective public services	New services or channels	Improved Administrative Efficiency	Cost-reduction	Responsiveness of government operation	Improved management of public resources	Increased quality of processes and systems	Better collaboration and better communication	Reduced or eliminated the risk of corruption and abuse of the law by public servants	Enabled greater fairness, honesty, equality	Open government capabilities	Increased transparency of public sector operations	Increased public participation in government actions and policy making	Improved public control and influence on government actions and policies
0	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	x	NaN	NaN	x
1	x	NaN	x	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	x	NaN	x	NaN	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	x	x	x	x
3	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	x	NaN	NaN	x	x	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	GovTech
0	NaN
1	NaN
2	NaN
3	NaN
4	NaN
5	NaN
6	NaN
7	No
8	NaN
9	NaN
10	NaN
11	NaN
12	NaN
13	NaN
14	Yes
15	NaN
16	NaN
17	NaN
18	NaN
19	NaN