001/*
002 *    GeoAPI - Java interfaces for OGC/ISO standards
003 *    Copyright © 2018-2023 Open Geospatial Consortium, Inc.
004 *    http://www.geoapi.org
005 *
006 *    Licensed under the Apache License, Version 2.0 (the "License");
007 *    you may not use this file except in compliance with the License.
008 *    You may obtain a copy of the License at
009 *
010 *        http://www.apache.org/licenses/LICENSE-2.0
011 *
012 *    Unless required by applicable law or agreed to in writing, software
013 *    distributed under the License is distributed on an "AS IS" BASIS,
014 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 *    See the License for the specific language governing permissions and
016 *    limitations under the License.
017 */
018package org.opengis.geoapi.schema;
019
020
021/**
022 * Style of the documentation to store. Documentation in XSD files are not sentence;
023 * they begin with a lower-case letter instead of an upper-case one and do not finish
024 * with a period. Those documentation can be read verbatim, or transformed into sentences.
025 *
026 * @author  Martin Desruisseaux (Geomatys)
027 * @since   3.1
028 * @version 3.1
029 */
030public enum DocumentationStyle {
031    /**
032     * Skip documentation. The {@link SchemaInformation.Element#documentation} field will be {@code null}.
033     * This style can be used when documentation is not needed.
034     */
035    NONE,
036
037    /**
038     * Store documentation verbatim, without transforming them into sentences.
039     */
040    VERBATIM,
041
042    /**
043     * Transform documentation to sentences.
044     */
045    SENTENCE;
046
047    /**
048     * Prefix to omit at the beginning of sentence. Some XSD files begin their documentation with
049     * {@code "Description:"}, which is not necessary.
050     */
051    private static final String OMIT = "Description:";
052
053    /**
054     * Parts to ignore from the description given in XSD files.
055     *
056     * @todo store those information in a separated map.
057     */
058    private static final String[] IGNORE = {
059        "FGDC:",
060        "Position:",
061        "Postion:",         // Typo found in some XSD.
062        "shortName:",
063        "Conditional",
064        "NITF_ACFTA:",
065        "Note in 19115-3"
066    };
067
068    /**
069     * Known typos in XSD files. Values at even indexes are the typos
070     * and values at odd indexes are the fixes.
071     *
072     * @see <a href="https://github.com/opengeospatial/geoapi/pull/42">Issue #42</a>
073     */
074    private static final String[] TYPOS = {
075        "avaialble",    "available",
076        "desimination", "dissemination",
077        "identifer",    "identifier",
078        "occurance",    "occurrence",
079        "occurence",    "occurrence",
080        "occured",      "occurred",
081        "recieve",      "receive",
082        "temportal",    "temporal"
083    };
084
085    /**
086     * Returns the index {@literal >=} {@code from} of the first non-whitespace character.
087     *
088     * @param  doc   the documentation declared in the XSD file.
089     * @param  from  index of the first character in {@code doc}.
090     * @return index of the first character to use in {@code doc}.
091     */
092    private static int skipLeadingWhitespaces(final String doc, int from) {
093         while (from < doc.length()) {
094            final int c = doc.codePointAt(from);
095            if (!Character.isWhitespace(c)) break;
096            from += Character.charCount(c);
097        }
098        return from;
099    }
100
101    /**
102     * Returns the index {@literal <} {@code from} of the last whitespace character.
103     *
104     * @param  doc   the documentation declared in the XSD file.
105     * @param  from  index after the last character in {@code doc}.
106     * @return index after the last character to use in {@code doc}.
107     */
108    private static int skipTrailingWhitespaces(final String doc, int from) {
109         while (from > 0) {
110            final int c = doc.codePointBefore(from);
111            if (!Character.isWhitespace(c)) break;
112            from -= Character.charCount(c);
113        }
114        return from;
115    }
116
117    /**
118     * Transforms the given documentation from XSD file in to a sentence.
119     * See class javadoc for details.
120     *
121     * @param  doc     documentation from XSD file.
122     * @param  buffer  temporary buffer. Must be initially empty.
123     * @return the sentence, or {@code null} if none.
124     */
125    static String sentence(final String doc, final StringBuilder buffer) {
126        /*
127         * Skip leading whitespaces and "Description:" prefix (f any),
128         * then omit "annexes" on new lines after the main description.
129         * If the result is an empty string, use null for "no documentation".
130         */
131        int startAt = skipLeadingWhitespaces(doc, 0);
132        if (doc.regionMatches(true, startAt, OMIT, 0, OMIT.length())) {
133            startAt = skipLeadingWhitespaces(doc, OMIT.length());
134        }
135        final int stopAt = beforeAnnexes(doc);
136        if (startAt >= stopAt) {
137            return null;
138        }
139        /*
140         * At this point we know the sub-string to use for documentation.
141         * Copy as a sentence (upper-case first letter, final period).
142         */
143        final int firstChar = doc.codePointAt(startAt);
144        buffer.appendCodePoint(Character.toUpperCase(firstChar))
145              .append(doc, startAt + Character.charCount(firstChar), stopAt);
146        if (doc.charAt(stopAt - 1) != '.') {
147            buffer.append('.');
148        }
149        // Replace multi-spaces by a single space.
150        for (int i=0; (i = buffer.indexOf("  ", i)) >= 0;) {
151            buffer.deleteCharAt(i);
152        }
153        // Documentation in XSD are not sentences. Make it a sentence.
154        int i = buffer.indexOf(" NOTE: ");
155        if (i > 0 && buffer.charAt(i-1) != '.') {
156            buffer.insert(i, '.');
157        }
158        /*
159         * Fix typos.
160         */
161        for (int t=0; t<TYPOS.length;) {
162            final String typo = TYPOS[t++];
163            final String fix  = TYPOS[t++];
164            i = buffer.indexOf(typo);
165            while (i >= 0) {
166                buffer.replace(i, i + typo.length(), fix);
167                i = buffer.indexOf(typo, i + fix.length());
168            }
169        }
170        return buffer.toString();
171    }
172
173    /**
174     * {@return the index after the last character to keep in the given documentation}.
175     * This method cut the documentation before trailing "shortName" and other annexes.
176     *
177     * @param  doc  the documentation declared in the XSD file.
178     */
179    private static int beforeAnnexes(final String doc) {
180        int stopAt = doc.length();
181nextLn: for (int pos = stopAt; --pos >= 0;) {
182            final int c = doc.charAt(pos);
183            final boolean isEOL = (c == '\r' || c == '\n');
184            if (isEOL || Character.isWhitespace(c)) {
185                final int lineStart = skipLeadingWhitespaces(doc, pos);
186                for (final String header : IGNORE) {
187                    if (doc.regionMatches(true, lineStart, header, 0, header.length())) {
188                        stopAt = pos = skipTrailingWhitespaces(doc, pos);
189                        continue nextLn;
190                    }
191                }
192                if (isEOL) break;
193            }
194        }
195        return skipTrailingWhitespaces(doc, stopAt);
196    }
197}