diff --git a/sysadmin/azure-billing/.gitignore b/sysadmin/azure-billing/.gitignore new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/.gitignore @@ -0,0 +1,2 @@ +.env +output diff --git a/sysadmin/azure-billing/Dockerfile b/sysadmin/azure-billing/Dockerfile new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/Dockerfile @@ -0,0 +1,17 @@ +FROM debian:11 + +# selenium + +RUN apt update && apt install -y python3 python3-pip chromium-driver # python3-selenium python3-tabulate python3-matplotlib python3-jinja2 # python3-pandas +# install pandas through pip to have a recent version + +COPY requirements.txt / +RUN pip install -r /requirements.txt + +COPY entrypoint.sh / + +RUN chmod u+x /entrypoint.sh + +ENTRYPOINT /entrypoint.sh + +WORKDIR /src diff --git a/sysadmin/azure-billing/Readme.md b/sysadmin/azure-billing/Readme.md new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/Readme.md @@ -0,0 +1,37 @@ + +# Prerequisite + +The following tools need to be installed to generate the statistics: +- docker +- docker-compose + +# How to generate the statistics + +- Create a `.env` file containing the following lines: +``` +LOGIN= +PASSWORD= +``` +- Launch the following command: +``` +docker-compose up +``` + +It will create a docker volume with inside: +- `AzureUsage.csv`: the raw csv file with one year of history +- index.html +- cost_per_[day|month].[html|md|png] +- cost_per_service_per_[day|month].[html|md|png] + +# Debugging a script + +The container can be launched with this command: +``` +docker run -ti -v $PWD:/data --rm --entrypoint=bash azure-billing_azure_report +``` +Once in the container, any script can be manually launched for debugging purpose. +For example: +``` +docker run -ti -v $PWD:/src --rm --entrypoint=bash azure-billing_azure_report +python3 compute_stats.py output +``` diff --git a/sysadmin/azure-billing/compute_stats.py b/sysadmin/azure-billing/compute_stats.py new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/compute_stats.py @@ -0,0 +1,112 @@ +from datetime import datetime +from jinja2 import Environment, FileSystemLoader + +import click +import matplotlib.pyplot as plt +import pandas + +def generate_simple_costs( + data: pandas.DataFrame, + date_format: str, + base_file_name: str) -> None: + + data.reset_index(inplace=True) + + data['Date'] = data['Date'].dt.strftime(date_format) + data.filter(items=['Date', 'Cost']) + generate_data_files(data[['Date', 'Cost']], base_file_name) + + +def pad_series(series: pandas.Series) -> pandas.Series: + return series.astype(str).str.pad(2, fillchar='0') + + +def generate_data_files(data: pandas.DataFrame, base_name: str) -> None: + with open(f"{base_name}.md", 'w') as f: + print(f"Generating {f.name}") + f.write(data.to_markdown(index=False)) + with open(f"{base_name}.html", 'w') as f: + print(f"Generating {f.name}") + f.write(data.to_html( + index=False, + float_format=lambda x: '%10.2f' % x) + ) + + +@click.command() +@click.argument('output_dir', type=click.Path(exists="true"), default="AzureUsage.csv") +def main(output_dir) -> None: + + csv = pandas.read_csv(output_dir + '/AzureUsage.csv', parse_dates=[2]) + + # Cost per day + cost_per_day = csv.groupby('Date', as_index=True).sum() + cost_per_day.plot(y='Cost') + plt.savefig(f"{output_dir}/cost_per_day.png") + + cost_per_day.reset_index(inplace=True) + cost_per_day['Year'] = cost_per_day['Date'].dt.year + cost_per_day['Month'] = cost_per_day['Date'].dt.month + cost_per_day['Day'] = cost_per_day['Date'].dt.day + cost_per_day.sort_values(by=['Year', 'Month', 'Day'], inplace=True, ascending=False) + + generate_simple_costs(cost_per_day, "%Y-%m-%d", output_dir + "/cost_per_day") + + # Cost per month + cost_per_month = csv.groupby(pandas.Grouper(key='Date', freq='M'), as_index=True).sum() + cost_per_month.plot(y='Cost') + plt.savefig(f"{output_dir}/cost_per_month.png") + + cost_per_month.reset_index(inplace=True) + cost_per_month['Year'] = cost_per_month['Date'].dt.year + cost_per_month['Month'] = cost_per_month['Date'].dt.month + cost_per_month.sort_values(by=['Year', 'Month'], inplace=True, ascending=False) + + generate_simple_costs(cost_per_month, "%Y-%m", output_dir + "/cost_per_month") + + # Cost per service per month + cost_per_service = csv.copy() + cost_per_service['Year'] = cost_per_service['Date'].dt.year + cost_per_service['Month'] = cost_per_service['Date'].dt.month + cost_per_service['Day'] = cost_per_service['Date'].dt.day + cost_per_service_per_month = cost_per_service.groupby(['Year', 'Month', 'ServiceName', 'ServiceResource']).sum() + cost_per_service_per_month.reset_index(inplace=True) + cost_per_service_per_month.sort_values(by=['Year', 'Month','Cost'], inplace=True, ascending=False) + cost_per_service_per_month['Date'] = cost_per_service_per_month['Year'].astype(str) + \ + '-' + \ + pad_series(cost_per_service_per_month['Month']) + + generate_data_files( + cost_per_service_per_month[['Date','ServiceName', 'ServiceResource', 'Cost']], + output_dir + "/cost_per_service_per_month") + + # Cost per service per day + cost_per_service_per_day = cost_per_service.groupby(['Year', 'Month', 'Day', 'ServiceName', 'ServiceResource']).sum() + cost_per_service_per_day.reset_index(inplace=True) + cost_per_service_per_day.sort_values(by=['Year', 'Month', 'Day', 'Cost'], inplace=True, ascending=False) + cost_per_service_per_day['Date'] = cost_per_service_per_day['Year'].astype(str) + \ + '-' + \ + pad_series(cost_per_service_per_day['Month']) + \ + '-' + \ + pad_series(cost_per_service_per_day['Day']) + + generate_data_files(cost_per_service_per_day[['Date','ServiceName', 'ServiceResource', 'Cost']], output_dir + "/cost_per_service_per_day") + + ## + # index.html page generation + ## + index_file_name = f"{output_dir}/index.html" + print(f"Generating {index_file_name}") + + generated_date = datetime.now() + + template_file_loader = FileSystemLoader(searchpath='./') + env = Environment(loader=template_file_loader) + template = env.get_template('index.html.tmpl') + index = template.render(generated_date=generated_date) + + with open(index_file_name, 'w') as f: + f.write(index) + +if __name__ == '__main__': + main() diff --git a/sysadmin/azure-billing/docker-compose.yml b/sysadmin/azure-billing/docker-compose.yml new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/docker-compose.yml @@ -0,0 +1,33 @@ +version: '3.4' + +volumes: + azure_billing_data: + +services: + azure_csv_cleanup: + image: debian:11 + volumes: + - azure_billing_data:/output + command: rm -fv /output/AzureUsage.csv + azure_report: + build: . + env_file: + - .env + volumes: + - $PWD:/src + - azure_billing_data:/output + environment: + - LOGIN=${LOGIN} + - PASSWORD=${PASSWORD} + - DEBUG=0 + stop_grace_period: 120s + depends_on: + - azure_csv_cleanup + nginx: + image: nginx + stop_grace_period: 120s + container_name: nginx + ports: + - 80:80 + volumes: + - azure_billing_data:/usr/share/nginx/html diff --git a/sysadmin/azure-billing/entrypoint.sh b/sysadmin/azure-billing/entrypoint.sh new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -e + +CSV_FILE="AzureUsage.csv" + +mkdir -p /output +pushd /output + +if [ ! -e "${CSV_FILE}" ]; then + echo "Getting ${CSV_FILE}..." + python3 /src/getcsv.py +else + echo "${CSV_FILE} already exists" +fi + +echo Compiling usage statitics... + +popd + +python3 /src/compute_stats.py /output diff --git a/sysadmin/azure-billing/getcsv.py b/sysadmin/azure-billing/getcsv.py new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/getcsv.py @@ -0,0 +1,107 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from datetime import datetime, timedelta + +import os +import time + +# To always have a complete month +START_DATE_FORMAT = "%Y-%m-01" +END_DATE_FORMAT = "%Y-%m-%d" +BASE_SPONSORSHIP_URL = "https://www.microsoftazuresponsorships.com" +EXPECTED_FILE = "AzureUsage.csv" + +def wait_for_download(): + MAX_COUNT = 10 + print("Waiting for download", end="") + count = 0 + + while not os.path.exists(EXPECTED_FILE) and count < MAX_COUNT: + time.sleep(2) + print(".", end="") + count += 1 + if count >= MAX_COUNT: + raise Exception("File not found") + print("") + print("done!") + +if __name__ == '__main__': + login = os.environ.get("LOGIN") + password = os.environ.get("PASSWORD") + DEBUG = os.environ.get("DEBUG") in ["1", "true"] + + assert login is not None + assert password is not None + + now = datetime.now() + last_year = now - timedelta(365) + end = time.strftime(END_DATE_FORMAT, now.timetuple()) + start = time.strftime(START_DATE_FORMAT, last_year.timetuple()) + + print(f"Retrieving consumption from {start} to {end}") + + CSV_URL = f"{BASE_SPONSORSHIP_URL}/Usage/DownloadUsage?startDate={start}&endDate={end}&fileType=csv" + print(f"CSV url: {CSV_URL}") + + options = webdriver.ChromeOptions() + options.add_argument("no-sandbox") + options.add_argument("--disable-dev-shm-usage") + options.add_argument("--window-size=800,600") + options.add_argument("--headless") + + driver = webdriver.Chrome(options=options) + + print("Going to the portal login page...") + driver.get(f"{BASE_SPONSORSHIP_URL}/Account/Login") + wait = WebDriverWait(driver, 10) + + wait.until(EC.visibility_of_element_located((By.NAME, "loginfmt"))) + + print("Entering login...") + loginInput = driver.find_element(by=By.NAME, value="loginfmt") + loginInput.send_keys(login, Keys.ENTER) + if DEBUG: + driver.save_screenshot("user.png") + + wait.until(EC.visibility_of_element_located((By.NAME, "passwd"))) + + print("Entering password...") + + passwordInput = driver.find_element(by=By.NAME, value="passwd") + + try: + passwordInput.send_keys(password, Keys.ENTER) + finally: + if DEBUG: + driver.save_screenshot("password.png") + + print("Waiting for stay signed page...") + + try: + wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "form"))) + finally: + if DEBUG: + driver.save_screenshot("staysigned.png") + + print("On stay signed page") + button = driver.find_element(by=By.CSS_SELECTOR, value="input[value='No']") + button.send_keys(Keys.ENTER) + + print("Waiting for home page") + try: + wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.pagecontent"))) + finally: + if DEBUG: + driver.save_screenshot("sponsorships-home.png") + + print("Downloading usage summary csv") + driver.get(CSV_URL) + + wait_for_download() + + print(f"Usage csv file downloaded and available in the {EXPECTED_FILE} file") + + driver.close() diff --git a/sysadmin/azure-billing/index.html.tmpl b/sysadmin/azure-billing/index.html.tmpl new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/index.html.tmpl @@ -0,0 +1,30 @@ + + + + + Azure reporting ({{ generated_date.strftime('%Y-%m-%d') }}) + + +

Azure reporting until the {{ generated_date.strftime('%Y-%m-%d') }}

+

Cost per day

+

+ +
+ Raw data: html / markdown +

+

Cost per month

+

+ +
+ Raw sdata: html / markdown +

+ +

Cost per service per day

+

Raw data: html / markdown

+ +

Cost per service per month

+

Raw data: html / markdown

+ +

generation date: {{ generated_date }}

+ + diff --git a/sysadmin/azure-billing/requirements.txt b/sysadmin/azure-billing/requirements.txt new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/requirements.txt @@ -0,0 +1,6 @@ +selenium +pandas +tabulate +matplotlib +jinja2 +click