In this assessment, you will implement a simplified version of our core product. We provide a few starter classes (Column, DataWarehouseTable, DataWarehouse), and your task is to implement SigmaTable.
Background
Sigma is a user interface that sits on top of various cloud data warehouses. We provide a unified way to interact with your data, despite differences in warehouse implementations. Data warehouses contain tables, usually similar to SQL tables.
A SigmaTable has rows and columns, in which you can create columns that are functions of other columns (e.g. [My Sum Col] : [Data 1] + [Data 2]). We also have the ability to add summaries to our tables, which are aggregations (like Min or Max) of all values in a column.
Implementation details (must follow)
- For security reasons, do not store any warehouse data other than the
get_data_warehousemethod in theSigmaTableclass. Do not store data computed from warehouse data in theSigmaTableeither. - Do not store additional data in the data warehouse.
- Trying to use a missing column should raise a
ValueError. - Missing data should be treated as
0. - You can store whatever metadata you want in the
SigmaTable. - In cases of ambiguity, make reasonable choices.
Example
A SigmaTable named My Cool Data with Col A = [1,2,3], Col B = [4,5,6], computed A + B = [5, 7, 9]. Summary widgets show Max of Col A = 3, Min of A + B = 5. Formulas reference columns by [Col A] or by [My Cool Warehouse/Col A] (full path).
解法
只存列元数据(名字、公式),不缓存行数据。get_value(col, row) 时用 [Name] / [Warehouse/Table/Col] 正则解析公式,递归解引用(本表计算列、同库其他表列、warehouse 取数)。列不存在抛 ValueError;数据缺失返回 0。汇总函数遍历 row in 0..n-1 累计。单次访问复杂度 O(formula_depth × refs)。
import re
from typing import Callable, Optional
class Column:
def __init__(self, name: str, formula: Optional[str] = None):
self.name = name
self.formula = formula
class SigmaTable:
REF = re.compile(r'\[([^\]]+)\]')
def __init__(self, name: str, warehouse_table_path: str, get_data_warehouse: Callable):
self.name = name
self.warehouse_table_path = warehouse_table_path
self.get_data_warehouse = get_data_warehouse
self.columns: dict[str, Column] = {}
self.summaries: list[tuple[str, str]] = []
def add_data_column(self, name: str):
self.columns[name] = Column(name)
def add_formula_column(self, name: str, formula: str):
self.columns[name] = Column(name, formula)
def add_summary(self, kind: str, column: str):
if column not in self.columns:
raise ValueError(f"Unknown column: {column}")
self.summaries.append((kind, column))
def num_rows(self) -> int:
wh, table = self.warehouse_table_path.split('/', 1)
return self.get_data_warehouse(wh).get_table(table).num_rows()
def cell(self, col_name: str, row: int):
if col_name not in self.columns:
raise ValueError(f"Unknown column: {col_name}")
col = self.columns[col_name]
if col.formula is None:
wh, table = self.warehouse_table_path.split('/', 1)
try:
return self.get_data_warehouse(wh).get_table(table).cell(col_name, row)
except (KeyError, IndexError):
return 0
return self._eval_formula(col.formula, row)
def _eval_formula(self, formula: str, row: int):
def repl(m):
ref = m.group(1)
if '/' in ref:
wh, rest = ref.split('/', 1)
table, col_name = rest.rsplit('/', 1) if '/' in rest else (rest, rest)
try:
return str(self.get_data_warehouse(wh).get_table(table).cell(col_name, row))
except (KeyError, IndexError):
return '0'
return str(self.cell(ref, row))
expr = self.REF.sub(repl, formula)
try:
return eval(expr, {'__builtins__': {}}, {})
except Exception:
return 0
def column_values(self, col_name: str):
n = self.num_rows()
return [self.cell(col_name, r) for r in range(n)]
def summary_value(self, kind: str, col_name: str):
vals = self.column_values(col_name)
if not vals:
return 0
if kind == 'Max': return max(vals)
if kind == 'Min': return min(vals)
if kind == 'Sum': return sum(vals)
if kind == 'Avg': return sum(vals) / len(vals)
raise ValueError(f"Unknown summary: {kind}")import java.util.*;
import java.util.function.Function;
import java.util.regex.*;
class Column {
String name;
String formula;
Column(String name, String formula) { this.name = name; this.formula = formula; }
}
public class SigmaTable {
private static final Pattern REF = Pattern.compile("\\[([^\\]]+)\\]");
private final String name;
private final String warehouseTablePath;
private final Function<String, DataWarehouse> getDataWarehouse;
private final Map<String, Column> columns = new LinkedHashMap<>();
private final List<String[]> summaries = new ArrayList<>();
public SigmaTable(String name, String path, Function<String, DataWarehouse> getDW) {
this.name = name;
this.warehouseTablePath = path;
this.getDataWarehouse = getDW;
}
public void addDataColumn(String name) { columns.put(name, new Column(name, null)); }
public void addFormulaColumn(String name, String formula) { columns.put(name, new Column(name, formula)); }
public void addSummary(String kind, String col) {
if (!columns.containsKey(col)) throw new IllegalArgumentException("Unknown column: " + col);
summaries.add(new String[]{kind, col});
}
public int numRows() {
String[] parts = warehouseTablePath.split("/", 2);
return getDataWarehouse.apply(parts[0]).getTable(parts[1]).numRows();
}
public Object cell(String colName, int row) {
if (!columns.containsKey(colName)) throw new IllegalArgumentException("Unknown column: " + colName);
Column col = columns.get(colName);
if (col.formula == null) {
String[] parts = warehouseTablePath.split("/", 2);
try {
return getDataWarehouse.apply(parts[0]).getTable(parts[1]).cell(colName, row);
} catch (Exception e) { return 0; }
}
return evalFormula(col.formula, row);
}
private Object evalFormula(String formula, int row) {
Matcher m = REF.matcher(formula);
StringBuffer sb = new StringBuffer();
while (m.find()) {
String ref = m.group(1);
Object v;
if (ref.contains("/")) {
String[] segs = ref.split("/");
try {
v = getDataWarehouse.apply(segs[0]).getTable(segs[1]).cell(segs[segs.length - 1], row);
} catch (Exception e) { v = 0; }
} else {
v = cell(ref, row);
}
m.appendReplacement(sb, v.toString());
}
m.appendTail(sb);
return evalArithmetic(sb.toString());
}
private double evalArithmetic(String expr) {
return new Object() {
int pos = -1, ch;
void next() { ch = (++pos < expr.length()) ? expr.charAt(pos) : -1; }
boolean eat(int c) { while (ch == ' ') next(); if (ch == c) { next(); return true; } return false; }
double parse() { next(); return expression(); }
double expression() {
double x = term();
for (;;) {
if (eat('+')) x += term();
else if (eat('-')) x -= term();
else return x;
}
}
double term() {
double x = factor();
for (;;) {
if (eat('*')) x *= factor();
else if (eat('/')) x /= factor();
else return x;
}
}
double factor() {
if (eat('+')) return factor();
if (eat('-')) return -factor();
int start = pos;
while ((ch >= '0' && ch <= '9') || ch == '.') next();
return Double.parseDouble(expr.substring(start, pos));
}
}.parse();
}
}
interface DataWarehouse { DataWarehouseTable getTable(String t); }
interface DataWarehouseTable { int numRows(); Object cell(String c, int r); }#include <bits/stdc++.h>
using namespace std;
struct Column { string name, formula; bool hasFormula; };
class DataWarehouseTable {
public:
virtual int numRows() = 0;
virtual double cell(const string& c, int r) = 0;
};
class DataWarehouse {
public:
virtual DataWarehouseTable* getTable(const string& t) = 0;
};
class SigmaTable {
string name, warehousePath;
function<DataWarehouse*(const string&)> getDW;
map<string, Column> columns;
vector<pair<string, string>> summaries;
public:
SigmaTable(string n, string p, function<DataWarehouse*(const string&)> g)
: name(n), warehousePath(p), getDW(g) {}
void addDataColumn(const string& n) { columns[n] = {n, "", false}; }
void addFormulaColumn(const string& n, const string& f) { columns[n] = {n, f, true}; }
void addSummary(const string& kind, const string& col) {
if (!columns.count(col)) throw invalid_argument("Unknown column: " + col);
summaries.push_back({kind, col});
}
int numRows() {
auto pos = warehousePath.find('/');
return getDW(warehousePath.substr(0, pos))->getTable(warehousePath.substr(pos + 1))->numRows();
}
double cell(const string& colName, int row) {
if (!columns.count(colName)) throw invalid_argument("Unknown column: " + colName);
auto& col = columns[colName];
if (!col.hasFormula) {
auto pos = warehousePath.find('/');
try { return getDW(warehousePath.substr(0, pos))->getTable(warehousePath.substr(pos + 1))->cell(colName, row); }
catch (...) { return 0; }
}
return evalFormula(col.formula, row);
}
private:
double evalFormula(const string& formula, int row) {
string expr;
size_t i = 0;
while (i < formula.size()) {
if (formula[i] == '[') {
size_t j = formula.find(']', i);
string ref = formula.substr(i + 1, j - i - 1);
double v;
if (ref.find('/') != string::npos) {
vector<string> segs;
stringstream ss(ref); string seg;
while (getline(ss, seg, '/')) segs.push_back(seg);
try { v = getDW(segs[0])->getTable(segs[1])->cell(segs.back(), row); }
catch (...) { v = 0; }
} else {
v = cell(ref, row);
}
expr += to_string(v);
i = j + 1;
} else expr += formula[i++];
}
return evalArith(expr);
}
double evalArith(const string& s) {
int pos = 0;
function<double()> parseExpr, parseTerm, parseFactor;
parseFactor = [&]() {
while (pos < (int)s.size() && s[pos] == ' ') pos++;
int sign = 1;
if (pos < (int)s.size() && s[pos] == '-') { sign = -1; pos++; }
int start = pos;
while (pos < (int)s.size() && (isdigit(s[pos]) || s[pos] == '.')) pos++;
return sign * stod(s.substr(start, pos - start));
};
parseTerm = [&]() {
double x = parseFactor();
while (pos < (int)s.size()) {
while (pos < (int)s.size() && s[pos] == ' ') pos++;
if (pos < (int)s.size() && s[pos] == '*') { pos++; x *= parseFactor(); }
else if (pos < (int)s.size() && s[pos] == '/') { pos++; x /= parseFactor(); }
else break;
}
return x;
};
parseExpr = [&]() {
double x = parseTerm();
while (pos < (int)s.size()) {
while (pos < (int)s.size() && s[pos] == ' ') pos++;
if (pos < (int)s.size() && s[pos] == '+') { pos++; x += parseTerm(); }
else if (pos < (int)s.size() && s[pos] == '-') { pos++; x -= parseTerm(); }
else break;
}
return x;
};
return parseExpr();
}
};