登录
OAmaster

In this assessment, you will implement a simplified version of our core product. We provide a few starter classes (Column, DataWarehouseTable, DataWarehouse), and your task is to implement SigmaTable.

Background

Sigma is a user interface that sits on top of various cloud data warehouses. We provide a unified way to interact with your data, despite differences in warehouse implementations. Data warehouses contain tables, usually similar to SQL tables.

A SigmaTable has rows and columns, in which you can create columns that are functions of other columns (e.g. [My Sum Col] : [Data 1] + [Data 2]). We also have the ability to add summaries to our tables, which are aggregations (like Min or Max) of all values in a column.

Implementation details (must follow)

  • For security reasons, do not store any warehouse data other than the get_data_warehouse method in the SigmaTable class. Do not store data computed from warehouse data in the SigmaTable either.
  • Do not store additional data in the data warehouse.
  • Trying to use a missing column should raise a ValueError.
  • Missing data should be treated as 0.
  • You can store whatever metadata you want in the SigmaTable.
  • In cases of ambiguity, make reasonable choices.

Example

A SigmaTable named My Cool Data with Col A = [1,2,3], Col B = [4,5,6], computed A + B = [5, 7, 9]. Summary widgets show Max of Col A = 3, Min of A + B = 5. Formulas reference columns by [Col A] or by [My Cool Warehouse/Col A] (full path).

解法

只存列元数据(名字、公式),不缓存行数据。get_value(col, row) 时用 [Name] / [Warehouse/Table/Col] 正则解析公式,递归解引用(本表计算列、同库其他表列、warehouse 取数)。列不存在抛 ValueError;数据缺失返回 0。汇总函数遍历 row in 0..n-1 累计。单次访问复杂度 O(formula_depth × refs)

import re
from typing import Callable, Optional

class Column:
    def __init__(self, name: str, formula: Optional[str] = None):
        self.name = name
        self.formula = formula

class SigmaTable:
    REF = re.compile(r'\[([^\]]+)\]')

    def __init__(self, name: str, warehouse_table_path: str, get_data_warehouse: Callable):
        self.name = name
        self.warehouse_table_path = warehouse_table_path
        self.get_data_warehouse = get_data_warehouse
        self.columns: dict[str, Column] = {}
        self.summaries: list[tuple[str, str]] = []

    def add_data_column(self, name: str):
        self.columns[name] = Column(name)

    def add_formula_column(self, name: str, formula: str):
        self.columns[name] = Column(name, formula)

    def add_summary(self, kind: str, column: str):
        if column not in self.columns:
            raise ValueError(f"Unknown column: {column}")
        self.summaries.append((kind, column))

    def num_rows(self) -> int:
        wh, table = self.warehouse_table_path.split('/', 1)
        return self.get_data_warehouse(wh).get_table(table).num_rows()

    def cell(self, col_name: str, row: int):
        if col_name not in self.columns:
            raise ValueError(f"Unknown column: {col_name}")
        col = self.columns[col_name]
        if col.formula is None:
            wh, table = self.warehouse_table_path.split('/', 1)
            try:
                return self.get_data_warehouse(wh).get_table(table).cell(col_name, row)
            except (KeyError, IndexError):
                return 0
        return self._eval_formula(col.formula, row)

    def _eval_formula(self, formula: str, row: int):
        def repl(m):
            ref = m.group(1)
            if '/' in ref:
                wh, rest = ref.split('/', 1)
                table, col_name = rest.rsplit('/', 1) if '/' in rest else (rest, rest)
                try:
                    return str(self.get_data_warehouse(wh).get_table(table).cell(col_name, row))
                except (KeyError, IndexError):
                    return '0'
            return str(self.cell(ref, row))
        expr = self.REF.sub(repl, formula)
        try:
            return eval(expr, {'__builtins__': {}}, {})
        except Exception:
            return 0

    def column_values(self, col_name: str):
        n = self.num_rows()
        return [self.cell(col_name, r) for r in range(n)]

    def summary_value(self, kind: str, col_name: str):
        vals = self.column_values(col_name)
        if not vals:
            return 0
        if kind == 'Max': return max(vals)
        if kind == 'Min': return min(vals)
        if kind == 'Sum': return sum(vals)
        if kind == 'Avg': return sum(vals) / len(vals)
        raise ValueError(f"Unknown summary: {kind}")
import java.util.*;
import java.util.function.Function;
import java.util.regex.*;

class Column {
    String name;
    String formula;
    Column(String name, String formula) { this.name = name; this.formula = formula; }
}

public class SigmaTable {
    private static final Pattern REF = Pattern.compile("\\[([^\\]]+)\\]");

    private final String name;
    private final String warehouseTablePath;
    private final Function<String, DataWarehouse> getDataWarehouse;
    private final Map<String, Column> columns = new LinkedHashMap<>();
    private final List<String[]> summaries = new ArrayList<>();

    public SigmaTable(String name, String path, Function<String, DataWarehouse> getDW) {
        this.name = name;
        this.warehouseTablePath = path;
        this.getDataWarehouse = getDW;
    }

    public void addDataColumn(String name) { columns.put(name, new Column(name, null)); }
    public void addFormulaColumn(String name, String formula) { columns.put(name, new Column(name, formula)); }

    public void addSummary(String kind, String col) {
        if (!columns.containsKey(col)) throw new IllegalArgumentException("Unknown column: " + col);
        summaries.add(new String[]{kind, col});
    }

    public int numRows() {
        String[] parts = warehouseTablePath.split("/", 2);
        return getDataWarehouse.apply(parts[0]).getTable(parts[1]).numRows();
    }

    public Object cell(String colName, int row) {
        if (!columns.containsKey(colName)) throw new IllegalArgumentException("Unknown column: " + colName);
        Column col = columns.get(colName);
        if (col.formula == null) {
            String[] parts = warehouseTablePath.split("/", 2);
            try {
                return getDataWarehouse.apply(parts[0]).getTable(parts[1]).cell(colName, row);
            } catch (Exception e) { return 0; }
        }
        return evalFormula(col.formula, row);
    }

    private Object evalFormula(String formula, int row) {
        Matcher m = REF.matcher(formula);
        StringBuffer sb = new StringBuffer();
        while (m.find()) {
            String ref = m.group(1);
            Object v;
            if (ref.contains("/")) {
                String[] segs = ref.split("/");
                try {
                    v = getDataWarehouse.apply(segs[0]).getTable(segs[1]).cell(segs[segs.length - 1], row);
                } catch (Exception e) { v = 0; }
            } else {
                v = cell(ref, row);
            }
            m.appendReplacement(sb, v.toString());
        }
        m.appendTail(sb);
        return evalArithmetic(sb.toString());
    }

    private double evalArithmetic(String expr) {
        return new Object() {
            int pos = -1, ch;
            void next() { ch = (++pos < expr.length()) ? expr.charAt(pos) : -1; }
            boolean eat(int c) { while (ch == ' ') next(); if (ch == c) { next(); return true; } return false; }
            double parse() { next(); return expression(); }
            double expression() {
                double x = term();
                for (;;) {
                    if (eat('+')) x += term();
                    else if (eat('-')) x -= term();
                    else return x;
                }
            }
            double term() {
                double x = factor();
                for (;;) {
                    if (eat('*')) x *= factor();
                    else if (eat('/')) x /= factor();
                    else return x;
                }
            }
            double factor() {
                if (eat('+')) return factor();
                if (eat('-')) return -factor();
                int start = pos;
                while ((ch >= '0' && ch <= '9') || ch == '.') next();
                return Double.parseDouble(expr.substring(start, pos));
            }
        }.parse();
    }
}

interface DataWarehouse { DataWarehouseTable getTable(String t); }
interface DataWarehouseTable { int numRows(); Object cell(String c, int r); }
#include <bits/stdc++.h>
using namespace std;

struct Column { string name, formula; bool hasFormula; };

class DataWarehouseTable {
public:
    virtual int numRows() = 0;
    virtual double cell(const string& c, int r) = 0;
};
class DataWarehouse {
public:
    virtual DataWarehouseTable* getTable(const string& t) = 0;
};

class SigmaTable {
    string name, warehousePath;
    function<DataWarehouse*(const string&)> getDW;
    map<string, Column> columns;
    vector<pair<string, string>> summaries;

public:
    SigmaTable(string n, string p, function<DataWarehouse*(const string&)> g)
        : name(n), warehousePath(p), getDW(g) {}

    void addDataColumn(const string& n) { columns[n] = {n, "", false}; }
    void addFormulaColumn(const string& n, const string& f) { columns[n] = {n, f, true}; }

    void addSummary(const string& kind, const string& col) {
        if (!columns.count(col)) throw invalid_argument("Unknown column: " + col);
        summaries.push_back({kind, col});
    }

    int numRows() {
        auto pos = warehousePath.find('/');
        return getDW(warehousePath.substr(0, pos))->getTable(warehousePath.substr(pos + 1))->numRows();
    }

    double cell(const string& colName, int row) {
        if (!columns.count(colName)) throw invalid_argument("Unknown column: " + colName);
        auto& col = columns[colName];
        if (!col.hasFormula) {
            auto pos = warehousePath.find('/');
            try { return getDW(warehousePath.substr(0, pos))->getTable(warehousePath.substr(pos + 1))->cell(colName, row); }
            catch (...) { return 0; }
        }
        return evalFormula(col.formula, row);
    }

private:
    double evalFormula(const string& formula, int row) {
        string expr;
        size_t i = 0;
        while (i < formula.size()) {
            if (formula[i] == '[') {
                size_t j = formula.find(']', i);
                string ref = formula.substr(i + 1, j - i - 1);
                double v;
                if (ref.find('/') != string::npos) {
                    vector<string> segs;
                    stringstream ss(ref); string seg;
                    while (getline(ss, seg, '/')) segs.push_back(seg);
                    try { v = getDW(segs[0])->getTable(segs[1])->cell(segs.back(), row); }
                    catch (...) { v = 0; }
                } else {
                    v = cell(ref, row);
                }
                expr += to_string(v);
                i = j + 1;
            } else expr += formula[i++];
        }
        return evalArith(expr);
    }

    double evalArith(const string& s) {
        int pos = 0;
        function<double()> parseExpr, parseTerm, parseFactor;
        parseFactor = [&]() {
            while (pos < (int)s.size() && s[pos] == ' ') pos++;
            int sign = 1;
            if (pos < (int)s.size() && s[pos] == '-') { sign = -1; pos++; }
            int start = pos;
            while (pos < (int)s.size() && (isdigit(s[pos]) || s[pos] == '.')) pos++;
            return sign * stod(s.substr(start, pos - start));
        };
        parseTerm = [&]() {
            double x = parseFactor();
            while (pos < (int)s.size()) {
                while (pos < (int)s.size() && s[pos] == ' ') pos++;
                if (pos < (int)s.size() && s[pos] == '*') { pos++; x *= parseFactor(); }
                else if (pos < (int)s.size() && s[pos] == '/') { pos++; x /= parseFactor(); }
                else break;
            }
            return x;
        };
        parseExpr = [&]() {
            double x = parseTerm();
            while (pos < (int)s.size()) {
                while (pos < (int)s.size() && s[pos] == ' ') pos++;
                if (pos < (int)s.size() && s[pos] == '+') { pos++; x += parseTerm(); }
                else if (pos < (int)s.size() && s[pos] == '-') { pos++; x -= parseTerm(); }
                else break;
            }
            return x;
        };
        return parseExpr();
    }
};