From 851d60ef6ffe1cf2200f713ba1a38108fcede660 Mon Sep 17 00:00:00 2001 From: Radon Rosborough Date: Sun, 29 Aug 2021 09:38:05 -0700 Subject: [PATCH] Financials --- .gitignore | 1 + financials/fin.py | 111 +++++++++++++++++++++++++++++++ financials/poetry.lock | 135 ++++++++++++++++++++++++++++++++++++++ financials/pyproject.toml | 16 +++++ 4 files changed, 263 insertions(+) create mode 100755 financials/fin.py create mode 100644 financials/poetry.lock create mode 100644 financials/pyproject.toml diff --git a/.gitignore b/.gitignore index d0df151..40f72a3 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ build node_modules out sentinel.h +financials/????-?? diff --git a/financials/fin.py b/financials/fin.py new file mode 100755 index 0000000..6ed42f1 --- /dev/null +++ b/financials/fin.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import decimal +import gzip +import io +import json +import logging +import os +import pathlib +import sys +from urllib.parse import urlparse + +import boto3 + +logging.basicConfig(level=logging.INFO) + +ROOT = pathlib.Path(__file__).parent + + +def die(msg): + raise AssertionError(msg) + + +def get_csv(year, month, force_download=False): + target_dir = ROOT / f"{year}-{month:02d}" + logging.info(f"Using base directory {target_dir}") + target_dir.mkdir(exist_ok=True) + latest_csv = target_dir / "latest.csv" + if force_download or not latest_csv.exists(): + try: + latest_csv.unlink() + except FileNotFoundError: + pass + s3 = boto3.client("s3") + o = urlparse(os.environ["BILLING_REPORTS_URL"], allow_fragments=False) + assert o.scheme == "s3" + bucket = o.netloc + base_prefix = o.path.strip("/") + "/" + report_name = base_prefix.rstrip("/").split("/")[-1] + logging.info(f"List s3://{bucket}/{base_prefix}") + month_prefixes = [ + elt["Prefix"] + for elt in s3.list_objects_v2( + Bucket=bucket, Prefix=f"{base_prefix}", Delimiter="/" + )["CommonPrefixes"] + ] + if not month_prefixes: + die("no report prefixes found") + expected_month_prefix = f"{base_prefix}{year}{month:02d}" + matching_month_prefixes = [ + p for p in month_prefixes if p.startswith(expected_month_prefix) + ] + if not matching_month_prefixes: + die(f"no report prefix for the specified month ({expected_month_prefix})") + if len(matching_month_prefixes) > 1: + die(f"multiple matching report prefixes: {repr(matching_month_prefixes)}") + (month_prefix,) = matching_month_prefixes + stream = io.BytesIO() + manifest_path = f"{month_prefix}{report_name}-Manifest.json" + logging.info(f"Download s3://{bucket}/{manifest_path}") + s3.download_fileobj(bucket, manifest_path, stream) + manifest = json.loads(stream.getvalue()) + (report_path,) = manifest["reportKeys"] + if not report_path.endswith(".csv.gz"): + die(f"unexpected report extension in {report_path}") + basename = pathlib.Path(report_path).name.removesuffix(".csv.gz") + logging.info(f"Download s3://{bucket}/{report_path}") + s3.download_file(bucket, report_path, f"{target_dir}/{basename}.csv.gz") + logging.info(f"Decompress {basename}.csv.gz") + with gzip.open(f"{target_dir}/{basename}.csv.gz") as f_read: + with open(f"{target_dir}/{basename}.csv", "wb") as f_write: + while chunk := f_read.read(1024): + f_write.write(chunk) + latest_csv.symlink_to(f"{basename}.csv") + return latest_csv + + +def read_csv(csv_path): + rows = [] + with open(csv_path) as f: + reader = csv.reader(f) + header = next(reader) + for row in reader: + rows.append(dict(zip(header, row))) + return rows + + +def classify_costs(csv_path): + items = read_csv(csv_path) + for item in items: + cost = decimal.Decimal(item["lineItem/UnblendedCost"]) + if not cost: + continue + breakpoint() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("date") + parser.add_argument("-f", "--force-download", action="store_true") + args = parser.parse_args() + year, month = map(int, args.date.split("-")) + csv_path = get_csv(year, month, force_download=args.force_download) + analyze(csv_path) + + +if __name__ == "__main__": + main() + sys.exit(0) diff --git a/financials/poetry.lock b/financials/poetry.lock new file mode 100644 index 0000000..c9ec56f --- /dev/null +++ b/financials/poetry.lock @@ -0,0 +1,135 @@ +[[package]] +name = "boto3" +version = "1.18.23" +description = "The AWS SDK for Python" +category = "main" +optional = false +python-versions = ">= 3.6" + +[package.dependencies] +botocore = ">=1.21.23,<1.22.0" +jmespath = ">=0.7.1,<1.0.0" +s3transfer = ">=0.5.0,<0.6.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.21.23" +description = "Low-level, data-driven core of boto 3." +category = "main" +optional = false +python-versions = ">= 3.6" + +[package.dependencies] +jmespath = ">=0.7.1,<1.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.11.24)"] + +[[package]] +name = "jmespath" +version = "0.10.0" +description = "JSON Matching Expressions" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-dotenv" +version = "0.19.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" +optional = false +python-versions = ">=3.5" + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "s3transfer" +version = "0.5.0" +description = "An Amazon S3 Transfer Manager" +category = "main" +optional = false +python-versions = ">= 3.6" + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "urllib3" +version = "1.26.6" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" + +[package.extras] +brotli = ["brotlipy (>=0.6.0)"] +secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.9" +content-hash = "170b0bcf9f0ae12c4c9e1daa195ecdb39585494414b88e53e3da72916eb52c51" + +[metadata.files] +boto3 = [ + {file = "boto3-1.18.23-py3-none-any.whl", hash = "sha256:1b08ace99e7b92965780e5ce759430ad62b7b7e037560bc772f9a8789f4f36d2"}, + {file = "boto3-1.18.23.tar.gz", hash = "sha256:31cc69e665f773390c4c17ce340d2420e45fbac51d46d945cc4a58d483ec5da6"}, +] +botocore = [ + {file = "botocore-1.21.23-py3-none-any.whl", hash = "sha256:3877d69e0b718b786f1696cd04ddbdb3a57aef6adb0239a29aa88754489849a4"}, + {file = "botocore-1.21.23.tar.gz", hash = "sha256:d0146d31dbc475942b578b47dd5bcf94d18fbce8c6d2ce5f12195e005de9b754"}, +] +jmespath = [ + {file = "jmespath-0.10.0-py2.py3-none-any.whl", hash = "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"}, + {file = "jmespath-0.10.0.tar.gz", hash = "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] +python-dotenv = [ + {file = "python-dotenv-0.19.0.tar.gz", hash = "sha256:f521bc2ac9a8e03c736f62911605c5d83970021e3fa95b37d769e2bbbe9b6172"}, + {file = "python_dotenv-0.19.0-py2.py3-none-any.whl", hash = "sha256:aae25dc1ebe97c420f50b81fb0e5c949659af713f31fdb63c749ca68748f34b1"}, +] +s3transfer = [ + {file = "s3transfer-0.5.0-py3-none-any.whl", hash = "sha256:9c1dc369814391a6bda20ebbf4b70a0f34630592c9aa520856bf384916af2803"}, + {file = "s3transfer-0.5.0.tar.gz", hash = "sha256:50ed823e1dc5868ad40c8dc92072f757aa0e653a192845c94a3b676f4a62da4c"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +urllib3 = [ + {file = "urllib3-1.26.6-py2.py3-none-any.whl", hash = "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4"}, + {file = "urllib3-1.26.6.tar.gz", hash = "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"}, +] diff --git a/financials/pyproject.toml b/financials/pyproject.toml new file mode 100644 index 0000000..cb626d9 --- /dev/null +++ b/financials/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "riju-financials" +version = "0.1.0" +description = "Financial data for Riju hosting" +authors = ["Radon Rosborough "] + +[tool.poetry.dependencies] +python = "^3.9" +boto3 = "^1.18.23" +python-dotenv = "^0.19.0" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api"