diff --git a/sysadmin/azure-billing/.gitignore b/sysadmin/azure-billing/.gitignore new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/.gitignore @@ -0,0 +1,2 @@ +.env +output diff --git a/sysadmin/azure-billing/Dockerfile b/sysadmin/azure-billing/Dockerfile new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/Dockerfile @@ -0,0 +1,12 @@ +FROM debian:11 + +# selenium +RUN apt update && apt install -y python3 python3-pip python3-selenium python3-tabulate python3-matplotlib python3-jinja2 # python3-pandas +# install pandas through pip to have a recent version +RUN pip install selenium pandas + +COPY entrypoint.sh / + +RUN chmod u+x /entrypoint.sh + +ENTRYPOINT /entrypoint.sh diff --git a/sysadmin/azure-billing/Readme.md b/sysadmin/azure-billing/Readme.md new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/Readme.md @@ -0,0 +1,37 @@ + +# Prerequisite + +The following tools needs to be installed to generate the statistics: +- docker +- docker-compose + +# How to generate the statistics + +- Create a `.env` file containing the following lines: +``` +LOGIN= +PASSWORD= +``` +- Launch the following command: +``` +docker-compose up +``` + +It will create an output directory with inside: +- `AzureUsage.csv`: the raw csv file with one year of history +- cost_per_[day|month].[md|png] +- cost_per_service_per_[day|month].[md|png] + +# Debugging a script + +The container can be launched with this command: +``` +docker run -ti -v $PWD:/data --rm --entrypoint=bash azure-billing_azure +``` +Once in the container, any script can be manually launched for debugging purpose. +For example: +``` +docker run -ti -v $PWD:/data --rm --entrypoint=bash azure-billing_azure +cd /data/output +python3 compute_stats.py +``` diff --git a/sysadmin/azure-billing/compute_stats.py b/sysadmin/azure-billing/compute_stats.py new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/compute_stats.py @@ -0,0 +1,66 @@ +import csv +import matplotlib.pyplot as plt +import pandas +import os + +from datetime import datetime + +def generate_simple_costs( + data: pandas.DataFrame, + date_format: str, + image_name: str, + markdown_name:str) -> None: + + data.plot(y='Cost') + plt.savefig(image_name) + + data.reset_index(inplace=True) + + data['Date'] = data['Date'].dt.strftime(date_format) + data.filter(items=['Date', 'Cost']) + with open(markdown_name, 'w') as md: + md.write(data[['Date', 'Cost']].to_markdown(index=False)) + +def pad_series(series: pandas.Series) -> pandas.Series: + return series.astype(str).str.pad(2, fillchar='0') + +filename = "AzureUsage.csv" + +csv = pandas.read_csv('AzureUsage.csv', parse_dates=[2]) + +# Cost per day +cost_per_day = csv.groupby('Date', as_index=True).sum() +generate_simple_costs(cost_per_day, "%Y-%m-%d", "cost_per_day.png", 'cost_per_day.md') + +# Cost per month +cost_per_month = csv.groupby(pandas.Grouper(key='Date', freq='M'), as_index=True).sum() +generate_simple_costs(cost_per_month, "%Y-%m", "cost_per_month.png", "cost_per_month.md") + +# Cost per service per month +cost_per_service = csv.copy() +cost_per_service['Year'] = cost_per_service['Date'].dt.year +cost_per_service['Month'] = cost_per_service['Date'].dt.month +cost_per_service['Day'] = cost_per_service['Date'].dt.day +cost_per_service_per_month = cost_per_service.groupby(['Year', 'Month', 'ServiceName', 'ServiceResource']).sum() +cost_per_service_per_month.reset_index(inplace=True) +cost_per_service_per_month.sort_values(by=['Year', 'Month','Cost'], inplace=True, ascending=False) +cost_per_service_per_month['Date'] = cost_per_service_per_month['Year'].astype(str) + \ + '-' + \ + pad_series(cost_per_service_per_month['Month']) + +with open('cost_per_service_per_month.md', 'w') as md: + md.write(cost_per_service_per_month[['Date','ServiceName', 'ServiceResource', 'Cost']].to_markdown(index=False)) + + +# Cost per service per day +cost_per_service_per_day = cost_per_service.groupby(['Year', 'Month', 'Day', 'ServiceName', 'ServiceResource']).sum() +cost_per_service_per_day.reset_index(inplace=True) +cost_per_service_per_day.sort_values(by=['Year', 'Month', 'Day', 'Cost'], inplace=True, ascending=False) +cost_per_service_per_day['Date'] = cost_per_service_per_day['Year'].astype(str) + \ + '-' + \ + pad_series(cost_per_service_per_day['Month']) + \ + '-' + \ + pad_series(cost_per_service_per_day['Day']) + +with open('cost_per_service_per_day.md', 'w') as md: + md.write(cost_per_service_per_day[['Date','ServiceName', 'ServiceResource', 'Cost']].to_markdown(index=False)) diff --git a/sysadmin/azure-billing/docker-compose.yml b/sysadmin/azure-billing/docker-compose.yml new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/docker-compose.yml @@ -0,0 +1,12 @@ +version: '3' + +services: + azure: + build: . + volumes: + - $PWD:/src + - $PWD/output:/output + environment: + - LOGIN=${LOGIN} + - PASSWORD=${PASSWORD} + - DEBUG=0 diff --git a/sysadmin/azure-billing/entrypoint.sh b/sysadmin/azure-billing/entrypoint.sh new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/entrypoint.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +CSV_FILE="AzureUsage.csv" + +mkdir -p /output +cd /output + +if [ ! -e "${CSV_FILE}" ]; then + echo "Getting ${CSV_FILE}..." + python3 /src/getcsv.py +else + echo "${CSV_FILE} already exists" +fi + +echo Compiling usage statitics... + +python3 /src/compute_stats.py diff --git a/sysadmin/azure-billing/getcsv.py b/sysadmin/azure-billing/getcsv.py new file mode 100644 --- /dev/null +++ b/sysadmin/azure-billing/getcsv.py @@ -0,0 +1,107 @@ +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from datetime import datetime, timedelta +import calendar + +import os +import time + +# To always have a complete month +START_DATE_FORMAT = "%Y-%m-01" +END_DATE_FORMAT = "%Y-%m-%d" +BASE_SPONSORSHIP_URL = "https://www.microsoftazuresponsorships.com" +EXPECTED_FILE = "AzureUsage.csv" + +def wait_for_download(): + MAX_COUNT = 10 + print("Waiting for download", end="") + count = 0 + + while not os.path.exists(EXPECTED_FILE) and count < MAX_COUNT: + time.sleep(2) + print(".", end="") + count += 1 + if count >= MAX_COUNT: + raise Exception("File not found") + print("") + print("done!") + +login = os.environ.get("LOGIN") +password = os.environ.get("PASSWORD") +DEBUG = os.environ.get("DEBUG") in ["1", "true"] + +assert login is not None +assert password is not None + +now = datetime.now() +last_year = now - timedelta(365) +end = time.strftime(END_DATE_FORMAT, now.timetuple()) +start = time.strftime(START_DATE_FORMAT, last_year.timetuple()) + +print(f"Retrieving consumption from {start} to {end}") + +CSV_URL = f"{BASE_SPONSORSHIP_URL}/Usage/DownloadUsage?startDate={start}&endDate={end}&fileType=csv" +print(f"CSV url: {CSV_URL}") + +options = webdriver.ChromeOptions() +options.add_argument("no-sandbox") +options.add_argument("--disable-dev-shm-usage") +options.add_argument("--window-size=800,600") +options.add_argument("--headless") + +driver = webdriver.Chrome(options=options) + +print("Going to the portal login page...") +driver.get(f"{BASE_SPONSORSHIP_URL}/Account/Login") +wait = WebDriverWait(driver, 10) + +wait.until(EC.visibility_of_element_located((By.NAME, "loginfmt"))) + +print("Entering login...") +loginInput = driver.find_element_by_name("loginfmt") +loginInput.send_keys(login, Keys.ENTER) +if DEBUG: + driver.save_screenshot("user.png") + +wait.until(EC.visibility_of_element_located((By.NAME, "passwd"))) + +print("Entering password...") + +passwordInput = driver.find_element_by_name("passwd") + +try: + passwordInput.send_keys(password, Keys.ENTER) +finally: + if DEBUG: + driver.save_screenshot("password.png") + +print("Waiting for stay signed page...") + +try: + wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "form"))) +finally: + if DEBUG: + driver.save_screenshot("staysigned.png") + +print("On stay signed page") +button = driver.find_element_by_css_selector("input[value='No']") +button.send_keys(Keys.ENTER) + +print("Waiting for home page") +try: + wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.pagecontent"))) +finally: + if DEBUG: + driver.save_screenshot("sponsorships-home.png") + +print("Downloading usage summary csv") +driver.get(CSV_URL) + +wait_for_download() + +print(f"Usage csv file downloaded and available in the {EXPECTED_FILE} file") + +driver.close()