#!/home/conda/feedstock_root/build_artifacts/bld/rattler-build_igwn-ligolw_1771054293/host_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl/bin/python
#
# Copyright (C) 2006--2010,2012,2014,2016,2017  Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


"""
Print things from LIGO LW XML files.  Inspired by lwtprint from LIGOTools.
"""

from optparse import OptionParser

from igwn_segments import utils as segmentsUtils
from lal.utils.cache import CacheEntry

from igwn_ligolw import __version__, ligolw
from igwn_ligolw import types as ligolw_types
from igwn_ligolw import utils as ligolw_utils

__author__ = "Kipp Cannon <kipp@g.ecc.u-tokyo.ac.jp>"


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
    parser = OptionParser(
        version="Name: %%prog\n%s" % __version__,
        usage="%prog [options] [url ...]",
        description="Prints the contents of table elements from one or more LIGO Light Weight XML files to stdout in delimited ASCII format.  In addition to regular files, the program can read from many common URLs such as http:// and ftp://.  Gzipped files will be automatically detected and decompressed.  If no filenames or URLs are given, then input is read from stdin.",
    )
    parser.add_option(
        "-i",
        "--input-cache",
        metavar="name",
        action="append",
        default=[],
        help="Get URLs from the LAL cache file.  Can be provided multiple times to name several caches to iterate over.",
    )
    parser.add_option(
        "-c",
        "--column",
        metavar="name",
        action="append",
        help="Print only the contents of the given column.  Can be provided multiple times to print multiple columns.  The default is to print all columns from each table.",
    )
    parser.add_option(
        "-d",
        "--delimiter",
        metavar="string",
        default=",",
        help='Delimit output with the given string.  The default is ",".',
    )
    parser.add_option(
        "-r",
        "--rows",
        metavar="rowspec",
        default=":",
        help='Print rows in the given range(s).  The format is first:last[,first:last,...].  Rows are numbered from 0.  A single first:last pair requests rows in the range [first, last).  If the first or last value of a pair is omited it means 0 or infinity respectively.  The default is ":", or to print all rows.',
    )
    parser.add_option(
        "-t",
        "--table",
        metavar="name",
        action="append",
        default=[],
        help="Print rows from this table.  Can be provided multiple times to print rows from multiple tables.  The default is to print the contents of all tables.",
    )
    parser.add_option(
        "-a",
        "--array",
        metavar="name",
        action="append",
        default=[],
        help="Print the contents of this array.  Can be provided multiple times to print the elements from multiple arrays.  The default is to print the contents of all arrays.",
    )
    parser.add_option("-v", "--verbose", action="store_true", help="Be verbose.")
    parser.add_option(
        "--constrain-lsc-tables",
        action="store_true",
        help="Impose additional constraints on official LSC tables.  Provides format validation and allows RAM requirements to be reduced.",
    )
    options, urls = parser.parse_args()

    # add urls from cache files
    urls += [
        CacheEntry(line).url for cache in options.input_cache for line in open(cache)
    ]

    # strip table names
    if options.table:
        options.table = list(map(ligolw.Table.TableName, options.table))

    if options.array:
        options.array = list(map(ligolw.Array.ArrayName, options.array))

    # turn row requests into a segment list
    try:
        options.rows = segmentsUtils.from_range_strings(options.rows.split(","))
    except ValueError as e:
        raise ValueError("invalid rowspec: %s" % str(e))

    # success
    return options, (urls or [None])


#
# =============================================================================
#
#                         How to find things to print
#
# =============================================================================
#


#
# How to print a table
#


def print_table(table_elem, columns, rows):
    if not columns:
        columns = table_elem.columnnames
    fmts = tuple(
        ligolw_types.FormatFunc[table_elem.getColumnByName(col).Type] for col in columns
    )
    for n, row in enumerate(table_elem):
        if n in rows:
            print(
                options.delimiter.join(
                    fmt(val) if val is not None else ""
                    for fmt, val in zip(fmts, (getattr(row, col) for col in columns))
                )
            )


#
# How to print an array
#


def print_array(array_elem):
    a = array_elem.array
    if len(a.shape) == 1:
        # a one-dimensional array
        for row in a:
            print(repr(row))
    elif len(a.shape) == 2:
        for row in a.transpose()[:]:
            print(options.delimiter.join(map(repr, row)))
    else:
        # a three or more dimensional array
        raise ValueError("array has more than 2 dimensions")


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


options, urls = parse_command_line()


#
# don't quote strings when printing them
#


for typ in ligolw_types.StringTypes:
    ligolw_types.FormatFunc[typ] = str


#
# Enable appropriate level of table parsing.  If specific table names have
# been asked for, don't parse other tables so as to improve parsing speed
# and reduce memory requirements.  Because we do this, we can assume later
# that we should print all the tables that can be found in the document.
#


if not (options.table or options.array):
    # parse the entire document
    ContentHandler = ligolw.LIGOLWContentHandler
else:

    class ContentHandler(ligolw.PartialLIGOLWContentHandler):
        def __init__(self, xmldoc):
            super(ContentHandler, self).__init__(
                xmldoc,
                lambda name, attrs: (
                    name in (ligolw.Table.tagName, ligolw.Array.tagName)
                )
                and (
                    ligolw.Table.TableName(attrs["Name"]) in options.table
                    or ligolw.Array.ArrayName(attrs["Name"]) in options.array
                ),
            )


if options.constrain_lsc_tables:
    import igwn_ligolw.lsctables  # noqa: F401, imported for side effects only


#
# If specific columns have been requested, don't load any others.
#


if options.column is not None:
    ligolw.Table.loadcolumns = set(options.column)


#
# Loop over input URLs
#


for url in urls:
    xmldoc = ligolw_utils.load_url(
        url, verbose=options.verbose, contenthandler=ContentHandler
    )
    for elem in ligolw.WalkChildren(xmldoc):
        if elem.tagName == ligolw.Table.tagName:
            print_table(elem, options.column, options.rows)
        elif elem.tagName == ligolw.Array.tagName:
            print_array(elem)
    xmldoc.unlink()
