Skip to content

eval

                        Eval patentcity model components

Take model (and test data, opt) and return performance metrics report

citizenship_fst(test_file, fst_file, fuzzy_match=True, verbose=False)

Evaluate citizenship finite state transducer and return report to stdout

Parameters:

Name Type Description Default
test_file str

test file path

required
fst_file str

fst file path

required
fuzzy_match bool

accept/reject fuzzy match

True
verbose bool

report verbosity

False

Usage:

patentcity eval citizenship-fst data/gold_cit_uspatent01.csv lib/fst_cit.json

Source code in patentcity/eval.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
@app.command()
def citizenship_fst(
    test_file: str, fst_file: str, fuzzy_match: bool = True, verbose: bool = False
):
    """Evaluate citizenship finite state transducer and return report to stdout

    Arguments:
        test_file: test file path
        fst_file: fst file path
        fuzzy_match: accept/reject fuzzy match
        verbose: report verbosity

    **Usage:**
        ```shell
        patentcity eval citizenship-fst data/gold_cit_uspatent01.csv lib/fst_cit.json
        ```
    """
    fst = json.loads(open(fst_file, "r").read())
    test_df = pd.read_csv(test_file, sep=";")
    test_df = test_df.replace({np.nan: None})

    res = []
    for _, row in test_df.iterrows():
        text = row["text"]
        pred = get_cit_code(text, fst, fuzzy_match)
        res += [
            [row["publication_number"], text, row["gold"], pred, row["gold"] == pred]
        ]
    res = pd.DataFrame(
        res, columns=["publication_number", "text", "gold", "pred", "res"]
    )
    errors = res.query("res==False")

    filename = os.path.basename(test_file)

    acc = 1 - len(errors) / len(res)
    typer.secho(f"## {filename}\n", fg=typer.colors.BLUE)
    typer.echo(f"Accuracy (fuzzy-match {fuzzy_match}): {acc * 100:.2f}%\n")
    if verbose:
        typer.echo(f"### Errors\n{errors.to_markdown()}")

patentee_deduplication(test_file, verbose=False)

Evaluate patentee deduplication and return the best threshold and related deduplication accuracy to stdout. Note: Deduplication is based on the relative levenshtein edit distance.

Parameters:

Name Type Description Default
test_file str

test file path

required
verbose bool

report verbosity

False

Usage:

patentcity eval patentee-deduplication data/gold_deduplication_uspatent01.jsonl

Source code in patentcity/eval.py
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
@app.command()
def patentee_deduplication(test_file: str, verbose: bool = False):
    """Evaluate patentee deduplication and return the best threshold and related deduplication accuracy to stdout.
    Note: Deduplication is based on the relative levenshtein edit distance.

    Arguments:
        test_file: test file path
        verbose: report verbosity

    **Usage:**
        ```shell
        patentcity eval patentee-deduplication data/gold_deduplication_uspatent01.jsonl
        ```
    """
    df = pd.read_json(test_file, lines=True)
    df["clas"] = df["answer"].apply(
        lambda x: 0 if x == "reject" else (1 if x == "accept" else None)
    )
    df = df.query("clas==clas").copy()
    accuracy = {}
    for threshold in np.arange(0, 2, 0.01):
        df["pred"] = df["lev_dist_rel"].apply(
            lambda x: 1 if x < threshold else 0  # pylint: disable=cell-var-from-loop
        )
        nb_true = len(df.query("clas==pred"))
        acc = nb_true / len(df)
        accuracy.update({threshold: acc})
    accuracy = pd.DataFrame.from_dict(accuracy, orient="index", columns=["accuracy"])
    if verbose:
        typer.secho("## Levenshtein distance (rel) distribution", fg=typer.colors.BLUE)
        typer.echo(
            (
                df.groupby("answer")
                .describe(percentiles=np.arange(0, 1, 0.01))["lev_dist_rel"]
                .filter(regex="%")
                .T.to_markdown()
            )
        )
    threshold_star, accuracy_star = (
        accuracy.idxmax().values[0],
        accuracy.max().values[0],
    )
    typer.secho("## Best", fg=typer.colors.BLUE)
    typer.echo(f"Best threshold: {threshold_star}\nAccuracy: {accuracy_star}")

relationship_model(test_file, rel_config, report='short')

Evaluate relationship model and return report to stdout

Parameters:

Name Type Description Default
test_file str

test file path

required
rel_config str

relationship resolution config file path

required
report str

size and format of the performance report (in "short", "long", "json")

'short'

Usage:

patentcity eval relationship-model gold_rel_uspatent01.jsonl configs/rel_uspatent01.yaml

Source code in patentcity/eval.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
@app.command()
def relationship_model(
    test_file: str, rel_config: str, report: str = "short"
):  # pylint: disable=too-many-statements
    """
    Evaluate relationship model and return report to stdout

    Arguments:
        test_file: test file path
        rel_config: relationship resolution config file path
        report: size and format of the performance report (in "short", "long", "json")

    **Usage:**
        ```shell
        patentcity eval relationship-model gold_rel_uspatent01.jsonl configs/rel_uspatent01.yaml
        ```
    """
    # TODO Transition to EntityRelationshipComponent
    assert report in ["short", "long", "json"]

    def eval_performance(pred, gold, label: str = None):
        def get_rel(relations, label):
            rel = [
                {
                    "head": [
                        rel["head_span"]["token_start"],
                        rel["head_span"]["token_end"],
                    ],
                    "child": [
                        rel["child_span"]["token_start"],
                        rel["child_span"]["token_end"],
                    ],
                    "label": rel["label"],
                }
                for rel in relations
            ]
            if label:
                rel = [rel_ for rel_ in rel if rel_["label"] == label]
            return rel

        rel_pred = get_rel(pred, label)
        rel_gold = get_rel(gold, label)

        true = list(rel_gold)
        true_positives = [rel for rel in rel_pred if rel in rel_gold]
        false_positives = [rel for rel in rel_pred if rel not in rel_gold]
        false_negatives = [rel for rel in rel_gold if rel not in rel_pred]
        return true, true_positives, false_positives, false_negatives

    def report_errors(errors):
        # an error is expressed as a rel with tokens
        # {"head": [head_start, head_end], "child": [child_start, child_end], "label": label,
        # "tokens": list}

        def report_error(error):
            tokens = error["tokens"]

            def get_text(tokens, boundaries):
                text = tokens[boundaries[0] : boundaries[1] + 1]
                text = " ".join(text).replace("\n", "")
                return text

            start = min(error["head"][0], error["child"][0])
            end = max(error["head"][1], error["child"][1])
            error_rel = f"""{get_text(tokens, error['head'])}({error['head']})->-{error['label']}->-{get_text(tokens, error['child'])}({error['child']})""".replace(
                "\n", ""
            )
            error_context = f"""{get_text(tokens, [start, end])}"""

            return error_rel, error_context

        data = []
        for error in errors:
            data += [report_error(error)]
        typer.echo(
            pd.DataFrame(columns=["error_rel", "error_context"], data=data).to_markdown(
                index=False
            )
        )

    def get_relation(head, child):
        relation = []
        if child:
            for child_ in child:
                # nb: in some configs (max_n >1), there might be more than 1 child
                # here generate something in the flavor of eg["relations"] for eval
                relation += [
                    {
                        "child": child_["token_end"],
                        "child_span": child_,
                        "head": head["token_end"],
                        "head_span": head,
                        "label": RELATIONS[child_["label"]],
                    }
                ]
        return relation

    def get_report(truth_categories, report):
        def filter_relation(label, *args):
            assert label in list(RELATIONS.values())
            for l in args:
                yield [e for e in l if e["label"] == label]

        def get_metrics(
            true, true_positives, false_positives, false_negatives, label=None
        ):
            if label:
                (
                    true,
                    true_positives,
                    false_positives,
                    false_negatives,
                ) = filter_relation(
                    label, true, true_positives, false_positives, false_negatives
                )
            # nb_t = len(true)
            nb_tp = len(true_positives)
            nb_fp = len(false_positives)
            nb_fn = len(false_negatives)
            try:
                prec = nb_tp / (nb_tp + nb_fp)
                rec = nb_tp / (nb_tp + nb_fn)
                f1 = 2 * prec * rec / (prec + rec)
            except ZeroDivisionError:
                rec = prec = f1 = None

            return prec, rec, f1

        true, true_positives, false_positives, false_negatives = truth_categories

        res = {}
        for label in [None] + list(RELATIONS.values()):
            prec, rec, f1 = get_metrics(
                true, true_positives, false_positives, false_negatives, label
            )
            label = label if label else "ALL"
            if all([prec, rec, f1]):
                res.update(
                    {
                        label: {
                            "p": round(prec, 3),
                            "r": round(rec, 3),
                            "f": round(f1, 3),
                        }
                    }
                )
            else:
                res.update({label: {"p": None, "r": None, "f": None}})

        if report == "json":
            typer.echo(json.dumps(res))
        else:
            typer.secho("\n# Report", fg=typer.colors.BLUE)
            typer.echo(f"Config file: {rel_config}")
            typer.secho("\n## Performance", fg=typer.colors.BLUE)
            typer.echo(pd.DataFrame.from_dict(res).to_markdown())
            if report == "long":
                typer.secho("\n## False positives", fg=typer.colors.BLUE)
                report_errors(sorted(false_positives, key=lambda d: d["label"]))
                typer.secho("\n## False negatives", fg=typer.colors.BLUE)
                report_errors(sorted(false_negatives, key=lambda d: d["label"]))

    true, true_positives, false_positives, false_negatives = [], [], [], []
    with open(rel_config, "r") as config_file:
        cfg = yaml.load(config_file, Loader=yaml.FullLoader)
    with open(test_file, "r") as lines:
        for line in lines:
            eg = json.loads(line)
            ents = eg["spans"]
            relations_gold = eg["relations"]

            heads = [ent for ent in ents if ent["label"] in ["ASG", "INV"]]
            children = [ent for ent in ents if ent["label"] not in ["ASG", "INV"]]

            relations_pred = []
            for head in heads:
                for label in ["LOC", "OCC", "CIT"]:
                    cfg_ = cfg[label]
                    child = get_child(
                        head,
                        children,
                        label,
                        cfg_["max_length"],
                        cfg_["position"],
                        cfg_["max_n"],
                    )

                    relations_pred += get_relation(head, child)

            (
                true_,
                true_positives_,
                false_positives_,
                false_negatives_,
            ) = eval_performance(relations_pred, relations_gold)

            true += true_
            true_positives += true_positives_
            false_positives += [
                {**fp, **{"tokens": [tok["text"] for tok in eg["tokens"]]}}
                for fp in false_positives_
            ]
            false_negatives += [
                {**fn, **{"tokens": [tok["text"] for tok in eg["tokens"]]}}
                for fn in false_negatives_
            ]
    get_report((true, true_positives, false_positives, false_negatives), report)

spacy_model(model, components='ner')

Evaluate spaCy model components and return report to stdout. Notes: i) only "ner" component is supported so far ii) report results from runtime eval

Parameters:

Name Type Description Default
model str

model path

required
components str

spaCy model components (comma separated)

'ner'

Usage:

patentcity eval spacy-model models/en_ent_uspatent01

Source code in patentcity/eval.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
@app.command()
def spacy_model(model: str, components: str = "ner"):
    """Evaluate spaCy model `components` and return report to stdout. Notes: i) only "ner" component is supported so far ii) report results from runtime eval

    Arguments:
        model: model path
        components: spaCy model components (comma separated)

    **Usage:**
        ```shell
        patentcity eval spacy-model models/en_ent_uspatent01
        ```

    """

    scores = json.loads(open(os.path.join(model, "meta.json"), "r").read())[
        "performance"
    ]

    components = components.split(",")
    if "ner" in components:
        p, r, f = scores["ents_p"], scores["ents_r"], scores["ents_f"]
        typer.secho("NER Scores", fg=typer.colors.BLUE)
        perfs = pd.DataFrame.from_dict(scores["ents_per_type"])
        perfs["ALL"] = (p, r, f)
        perfs = perfs.round(2)
        perfs = perfs[sorted(perfs.columns)]
        typer.echo(f"{perfs.to_markdown()}")