 "cells": [
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "import clip\n",
    "from evaluation_utils import norm, denorm\n",
    "from general_utils import *\n",
    "from datasets.lvis_oneshot3 import LVIS_OneShot3, LVIS_OneShot"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PhraseCut"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pc = experiment('experiments/phrasecut.yaml', nums=':6').dataframe()"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tab1 = pc[['name', 'pc_miou_best',  'pc_fgiou_best', 'pc_ap']]"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = ['pc_miou_0.3',  'pc_fgiou_0.3', 'pc_ap']\n",
    "tab1 = pc[['name'] + cols]\n",
    "for k in cols:\n",
    "    tab1.loc[:, k] = (100 * tab1.loc[:, k]).round(1)\n",
    "tab1.loc[:, 'name'] = ['CLIPSeg (PC+)', 'CLIPSeg (PC, $D=128$)', 'CLIPSeg (PC)', 'CLIP-Deconv', 'ViTSeg (PC+)', 'ViTSeg (PC)']\n",
    "tab1.insert(1, 't', [0.3]*tab1.shape[0])\n",
    "print(tab1.to_latex(header=False, index=False))"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For 0.1 threshold"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = ['pc_miou_0.1',  'pc_fgiou_0.1', 'pc_ap']\n",
    "tab1 = pc[['name'] + cols]\n",
    "for k in cols:\n",
    "    tab1.loc[:, k] = (100 * tab1.loc[:, k]).round(1)\n",
    "tab1.loc[:, 'name'] = ['CLIPSeg (PC+)', 'CLIPSeg (PC, $D=128$)', 'CLIPSeg (PC)', 'CLIP-Deconv', 'ViTSeg (PC+)', 'ViTSeg (PC)']\n",
    "tab1.insert(1, 't', [0.1]*tab1.shape[0])\n",
    "print(tab1.to_latex(header=False, index=False))"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# One-shot"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pascal"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pas = experiment('experiments/pascal_1shot.yaml', nums=':19').dataframe()"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pas[['name', 'pas_h2_miou_0.3', 'pas_h2_biniou_0.3', 'pas_h2_ap', 'pas_h2_fgiou_ct']]"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pas = experiment('experiments/pascal_1shot.yaml', nums=':8').dataframe()\n",
    "tab1 = pas[['pas_h2_miou_0.3', 'pas_h2_biniou_0.3', 'pas_h2_ap']]\n",
    "print('CLIPSeg (PC+) & 0.3 & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[0:4].mean(0).values), '\\\\\\\\')\n",
    "print('CLIPSeg (PC)  & 0.3 & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[4:8].mean(0).values), '\\\\\\\\')\n",
    "pas = experiment('experiments/pascal_1shot.yaml', nums='12:16').dataframe()\n",
    "tab1 = pas[['pas_h2_miou_0.2', 'pas_h2_biniou_0.2', 'pas_h2_ap']]\n",
    "print('CLIP-Deconv (PC+) & 0.2 & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[0:4].mean(0).values), '\\\\\\\\')\n",
    "pas = experiment('experiments/pascal_1shot.yaml', nums='16:20').dataframe()\n",
    "tab1 = pas[['pas_t_miou_0.2', 'pas_t_biniou_0.2', 'pas_t_ap']]\n",
    "print('ViTSeg (PC+) & 0.2 & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[0:4].mean(0).values), '\\\\\\\\')"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Pascal Zero-shot (in one-shot setting)\n",
    "Using the same setting as one-shot (hence different from the other zero-shot benchmark)"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pas = experiment('experiments/pascal_1shot.yaml', nums=':8').dataframe()\n",
    "tab1 = pas[['pas_t_miou_0.3', 'pas_t_biniou_0.3', 'pas_t_ap']]\n",
    "print('CLIPSeg (PC+) & 0.3 & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[0:4].mean(0).values), '\\\\\\\\')\n",
    "print('CLIPSeg (PC) & 0.3 & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[4:8].mean(0).values), '\\\\\\\\')\n",
    "pas = experiment('experiments/pascal_1shot.yaml', nums='12:16').dataframe()\n",
    "tab1 = pas[['pas_t_miou_0.3', 'pas_t_biniou_0.3', 'pas_t_ap']]\n",
    "print('CLIP-Deconv (PC+) & 0.3 & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[0:4].mean(0).values), '\\\\\\\\')\n",
    "pas = experiment('experiments/pascal_1shot.yaml', nums='16:20').dataframe()\n",
    "tab1 = pas[['pas_t_miou_0.2', 'pas_t_biniou_0.2', 'pas_t_ap']]\n",
    "print('ViTSeg (PC+) & 0.2 & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[0:4].mean(0).values), '\\\\\\\\')"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# without fixed thresholds...\n",
    "pas = experiment('experiments/pascal_1shot.yaml', nums=':8').dataframe()\n",
    "tab1 = pas[['pas_t_best_miou', 'pas_t_best_biniou', 'pas_t_ap']]\n",
    "print('CLIPSeg (PC+) & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[0:4].mean(0).values), '\\\\\\\\')\n",
    "print('CLIPSeg (PC) & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[4:8].mean(0).values), '\\\\\\\\')\n",
    "pas = experiment('experiments/pascal_1shot.yaml', nums='12:16').dataframe()\n",
    "tab1 = pas[['pas_t_best_miou', 'pas_t_best_biniou', 'pas_t_ap']]\n",
    "print('CLIP-Deconv (PC+) & CLIP & ' + ' & '.join(f'{x*100:.1f}' for x in tab1[0:4].mean(0).values), '\\\\\\\\')"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### COCO"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coco = experiment('experiments/coco.yaml', nums=':29').dataframe()"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tab1 = coco[['coco_h2_miou_0.1', 'coco_h2_biniou_0.1', 'coco_h2_ap']]\n",
    "tab2 = coco[['coco_h2_miou_0.2', 'coco_h2_biniou_0.2', 'coco_h2_ap']]\n",
    "tab3 = coco[['coco_h2_miou_best', 'coco_h2_biniou_best', 'coco_h2_ap']]\n",
    "print('CLIPSeg (COCO) & 0.1 & CLIP &  ' + ' & '.join(f'{x*100:.1f}' for x in tab1[:4].mean(0).values), '\\\\\\\\')\n",
    "print('CLIPSeg (COCO+N)  & 0.1 & CLIP &  ' + ' & '.join(f'{x*100:.1f}' for x in tab1[4:8].mean(0).values), '\\\\\\\\')\n",
    "print('CLIP-Deconv (COCO+N)  & 0.1 & CLIP &  ' + ' & '.join(f'{x*100:.1f}' for x in tab1[12:16].mean(0).values), '\\\\\\\\')\n",
    "print('ViTSeg (COCO)  & 0.1 & CLIP &  ' + ' & '.join(f'{x*100:.1f}' for x in tab1[8:12].mean(0).values), '\\\\\\\\')"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Zero-shot"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "zs = experiment('experiments/pascal_0shot.yaml', nums=':11').dataframe()"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tab1 = zs[['pas_zs_seen', 'pas_zs_unseen']]\n",
    "print('CLIPSeg (PC+) & CLIP &  ' + ' & '.join(f'{x*100:.1f}' for x in tab1[8:9].values[0].tolist() + tab1[10:11].values[0].tolist()), '\\\\\\\\')\n",
    "print('CLIP-Deconv & CLIP &  ' + ' & '.join(f'{x*100:.1f}' for x in tab1[2:3].values[0].tolist()  + tab1[3:4].values[0].tolist()), '\\\\\\\\')\n",
    "print('ViTSeg & ImageNet-1K &  ' + ' & '.join(f'{x*100:.1f}' for x in tab1[4:5].values[0].tolist()  + tab1[5:6].values[0].tolist()), '\\\\\\\\')"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ablation"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ablation = experiment('experiments/ablation.yaml', nums=':8').dataframe()"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tab1 = ablation[['name', 'pc_miou_best', 'pc_ap', 'pc-vis_miou_best', 'pc-vis_ap']]\n",
    "for k in ['pc_miou_best', 'pc_ap', 'pc-vis_miou_best', 'pc-vis_ap']:\n",
    "    tab1.loc[:, k] = (100 * tab1.loc[:, k]).round(1)\n",
    "tab1.loc[:, 'name'] = ['CLIPSeg', 'no CLIP pre-training', 'no-negatives', '50% negatives', 'no visual', '$D=16$', 'only layer 3', 'highlight mask']"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(tab1.loc[[0,1,4,5,6,7],:].to_latex(header=False, index=False))"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(tab1.loc[[0,1,4,5,6,7],:].to_latex(header=False, index=False))"
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Generalization"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "generalization = experiment('experiments/generalize.yaml').dataframe()"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gen = generalization[['aff_best_fgiou', 'aff_ap', 'ability_best_fgiou', 'ability_ap', 'part_best_fgiou', 'part_ap']].values"
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "    'CLIPSeg (PC+) & ' + ' & '.join(f'{x*100:.1f}' for x in gen[1]) + ' \\\\\\\\ \\n' + \\\n",
    "    'CLIPSeg (LVIS)  & ' + ' & '.join(f'{x*100:.1f}' for x in gen[0]) + ' \\\\\\\\ \\n' + \\\n",
    "    'CLIP-Deconv & ' + ' & '.join(f'{x*100:.1f}' for x in gen[2]) + ' \\\\\\\\ \\n' + \\\n",
    "    'VITSeg & ' + ' & '.join(f'{x*100:.1f}' for x in gen[3]) + ' \\\\\\\\'\n",
 "metadata": {
  "interpreter": {
   "hash": "800ed241f7db2bd3aa6942aa3be6809cdb30ee6b0a9e773dfecfa9fef1f4c586"
  "kernelspec": {
   "display_name": "env2",
   "language": "python",
   "name": "env2"
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
 "nbformat": 4,
 "nbformat_minor": 4