summaryrefslogtreecommitdiff
path: root/jimbrella/csv_table.py
diff options
context:
space:
mode:
Diffstat (limited to 'jimbrella/csv_table.py')
-rw-r--r--jimbrella/csv_table.py97
1 files changed, 97 insertions, 0 deletions
diff --git a/jimbrella/csv_table.py b/jimbrella/csv_table.py
new file mode 100644
index 0000000..6f431a7
--- /dev/null
+++ b/jimbrella/csv_table.py
@@ -0,0 +1,97 @@
+import csv
+import os
+from .lockfile import Lockfile
+from .utils import identity
+
+
+class CsvTable:
+ def __init__(self, path, schema: list[dict]):
+ """A generic CSV table of data, and basic I/O operations.
+
+ The first column should be unique. If so, it can be used to index rows, a feature of
+ method `_find`.
+
+ Arguments:
+ - path | file path for table.
+ - schema | list of dicts, each for a datum field (column).
+ | each dict contains the following keys:
+ | "name": what the column will be called in the dict that `_read` returns.
+ | "serializer": function to be applied to the datum when being written.
+ | "deserializer": same as above except it is when datum is being read.
+ | the latter two are optional, and when they are not specified, the default is
+ | the identity function, i.e. f such that f(x) = x for any x.
+ """
+ self.path = path
+ for col in schema:
+ for func in ("serializer", "deserializer"):
+ if func not in col or col[func] is None:
+ col[func] = identity
+
+ # schema: column number -> name and (de)serialization functions
+ # reverse_schema: name -> column number
+ self.schema = schema
+ self.reverse_schema = {schema[i]["name"]: i for i in range(len(schema))}
+ self.lockfile = Lockfile(self.path)
+ # Create file if it does not yet exist
+ try:
+ f = open(self.path, "x")
+ f.close()
+ except FileExistsError:
+ pass
+
+ def _read(self) -> list:
+ """Deserialize table."""
+ with open(self.path) as f:
+ reader = csv.reader(f)
+ rows = []
+ # `rows` is a list of 2-tuples
+ for row in reader:
+ # for each tuple (k, v) in `rows`,
+ # it will be unzipped into a dict key-value pair
+ rows.append(
+ dict(
+ [
+ (sch["name"], sch["deserializer"](datum))
+ for sch, datum in zip(self.schema, row)
+ ]
+ )
+ )
+ f.close()
+ return rows
+
+ def _write(self, rows: list) -> None:
+ """Serialize table. When a failure occurs, abort and recover data."""
+ # make backup in memory
+ with open(self.path) as f:
+ backup = f.read()
+ f.close()
+
+ self.lockfile.lock()
+
+ f = open(self.path, "w")
+ try:
+ writer = csv.writer(f)
+ for row in rows:
+ writer.writerow([row[col["name"]] for col in self.schema])
+ except Exception as e:
+ # failure occurred on write
+ # abort write, and write back contents as they were before
+ # TODO: keep log
+ f.close()
+ f = open(self.path, "w")
+ f.write(backup)
+ raise e
+ finally:
+ f.close()
+
+ self.lockfile.unlock()
+
+ def _update(self, update: dict) -> list:
+ """Update status of one row, and return the entire updated table."""
+ rows = self._read()
+ index_column = self.schema[0]["name"]
+ for idx, row in enumerate(rows):
+ if row[index_column] == update[index_column]:
+ rows[idx] = update
+ self._write(rows)
+ return rows